Java源码学习(JDK 11)——java.lang.String

定义

package java.lang;

public final class String implements java.io.Serializable, Comparable<String>, CharSequence {
	// ...
}

final 类，不能被继承
实现 Serializable 接口，可序列化
实现 Comparable 接口，可比较大小
实现 CharSequence 接口，StringBuffer和Stringbuilder同样实现该接口

属性

@Stable						// never null
private final byte[] value;	// JDK 11 内部用byte数组储存值 

private final byte coder;	// 编码 LATIN1 或 UTF16 

static final boolean COMPACT_STRINGS;	// 字符串压缩

static {
	COMPACT_STRINGS = true;	
}

@Native static final byte LATIN1 = 0;
@Native static final byte UTF16  = 1;

private int hash; // 将hashcode缓存起来

public static final Comparator<String> CASE_INSENSITIVE_ORDER = new CaseInsensitiveComparator();    // 内部类
// ...

hash：缓存hashcode，String经常被比较，将hashcode缓存，提高效率。
value：JDK 8及以前，value用char数组存储，然而很多时候，字符只需要1个字节来表示。因此从JDK 9以后，value使用byte数组存储，并添加了coder，COMPACT_STRINGS字段，帮助压缩字符串存储空间。
coder：LATIN1表示1个字符占用1个byte；UTF16表示1个字符占用2个byte。
COMPACT_STRINGS：默认值为true。当值为false时，字符串必然以UTF16的形式存储。
因此，当COMPACT_STRINGS=true并且每个字符都可用1个字节表示时，coder=LATIN1；否则coder=UTF16

内部类

private static class CaseInsensitiveComparator implements Comparator<String>, java.io.Serializable {
        // use serialVersionUID from JDK 1.2.2 for interoperability
    private static final long serialVersionUID = 8575799808933029326L;

    public int compare(String s1, String s2) {
        byte v1[] = s1.value;
        byte v2[] = s2.value;
        if (s1.coder() == s2.coder()) {
            return s1.isLatin1() ? StringLatin1.compareToCI(v1, v2)
            : StringUTF16.compareToCI(v1, v2);
        }
        return s1.isLatin1() ? StringLatin1.compareToCI_UTF16(v1, v2)
        : StringUTF16.compareToCI_Latin1(v1, v2);
    }

    /** Replaces the de-serialized object. */
    private Object readResolve() { return CASE_INSENSITIVE_ORDER; }
}

实现忽略大小写的字符串比较。
compareToIgnoreCase方法利用该内部类的方法实现。

构造方法

null

public String() {
	this.value = "".value;
	this.coder = "".coder;
}

char[]

public String(char value[]) {
	this(value, 0, value.length, null);
}

public String(char value[], int offset, int count) {
	this(value, offset, count, rangeCheck(value, offset, count));
}

private static Void rangeCheck(char[] value, int offset, int count) {
	checkBoundsOffCount(offset, count, value.length);	// 静态方法 如果数组越界会抛出StringIndexOutOfBoundsException
	return null;
}

String(char[] value, int off, int len, Void sig) {	// sig与public方法区别开
	if (len == 0) {
		this.value = "".value;
		this.coder = "".coder;
		return;
	}
	if (COMPACT_STRINGS) {
		byte[] val = StringUTF16.compress(value, off, len);
		if (val != null) {
			this.value = val;
			this.coder = LATIN1;
			return;
		}
	}
	this.coder = UTF16;
	this.value = StringUTF16.toBytes(value, off, len);
}

// StringUTF16.compress
public static byte[] compress(char[] val, int off, int len) {
	byte[] ret = new byte[len];
    if (compress(val, off, ret, 0, len) == len) {	// 压缩失败会返回0
    	return ret;	// LATIN1 编码
    }
    return null;
}

// StringUTF16.compress
// compressedCopy char[] -> byte[]
@HotSpotIntrinsicCandidate
public static int compress(char[] src, int srcOff, byte[] dst, int dstOff, int len) {
    for (int i = 0; i < len; i++) {
        char c = src[srcOff];
        if (c > 0xFF) {
            len = 0;
            break;
        }
        dst[dstOff] = (byte)c;	// char截断为byte
        srcOff++;
        dstOff++;
    }
    return len;
}

byte[]

// 与char[]类似 多了字符集的解码
// 参数可以是String类型的charsetName 也可以是CharSet类型
public String(byte bytes[], int offset, int length, String charsetName)
throws UnsupportedEncodingException {
	if (charsetName == null)
		throw new NullPointerException("charsetName");
	checkBoundsOffCount(offset, length, bytes.length);
	StringCoding.Result ret = StringCoding.decode(charsetName, bytes, offset, length);
	this.value = ret.value;
	this.coder = ret.coder;
}

StringBuffer/StringBuilder

// 内容复制 StringBuffer/StringBuilder修改不影响String
public String(StringBuffer buffer) {
	this(buffer.toString());
}
public String(StringBuilder builder) {
	this(builder, null);
}

方法

length：返回长度

// UTF16编码的需要将长度/2
public int length() {
	return value.length >> coder();		
}
byte coder() {
	// UTF16 = 1; LATIN1 = 0
	return COMPACT_STRINGS ? coder : UTF16;	
}

isEmpty：长度是否为0
charAt：某位置上的字符
getChars：获取char数组
getBytes：获取byte数组
equals：字符串相等

public boolean equals(Object anObject) {
    if (this == anObject) {
        return true;
    }
    if (anObject instanceof String) {
        String aString = (String)anObject;
        // 编码不同的字符串不可能相同 因为内容相同的字符串总是以相同编码存储
        if (coder() == aString.coder()) {
            return isLatin1() ? StringLatin1.equals(value, aString.value)
                              : StringUTF16.equals(value, aString.value);
        }
    }
    return false;
}

contentEquals：内容相同

public boolean contentEquals(CharSequence cs) {
    // Argument is a StringBuffer, StringBuilder
    if (cs instanceof AbstractStringBuilder) {
        if (cs instanceof StringBuffer) {
        	// 因为 StringBuffer 线程安全 所以加上synchronized
            synchronized(cs) {
               return nonSyncContentEquals((AbstractStringBuilder)cs);
            }
        } else {
            return nonSyncContentEquals((AbstractStringBuilder)cs);
        }
    }
    // Argument is a String
    if (cs instanceof String) {
        return equals(cs);
    }
    // Argument is a generic CharSequence
    int n = cs.length();
    if (n != length()) {
        return false;
    }
    byte[] val = this.value;
    if (isLatin1()) {
        for (int i = 0; i < n; i++) {
            if ((val[i] & 0xff) != cs.charAt(i)) {
                return false;
            }
        }
    } else {
        if (!StringUTF16.contentEquals(val, cs, n)) {
            return false;
        }
    }
    return true;
}

equalsIgnoreCase：忽略大小写字符串相同
compareTo：字符串比较，按字典序
compareToIgnoreCase：忽略大小写的字符串比较
regionMatches：字符串范围内相等
startsWith：是否以字符串开头
endsWith：是否以字符串结尾

public boolean endsWith(String suffix) {
    return startsWith(suffix, length() - suffix.length());
}

indexOf：返回第一次出现的下标，未出现返回-1

// StringLatin1.indexOf
public static int indexOf(byte[] value, int valueCount, byte[] str, int strCount, int fromIndex) {
    byte first = str[0];
    int max = (valueCount - strCount);
    for (int i = fromIndex; i <= max; i++) {
        // Look for first character.
        if (value[i] != first) {
            while (++i <= max && value[i] != first);
        }
        // Found first character, now look at the rest of value
        if (i <= max) {
            int j = i + 1;
            int end = j + strCount - 1;
            for (int k = 1; j < end && value[j] == str[k]; j++, k++);
                if (j == end) {
                    // Found whole string.
                    return i;
                }
            }
        }
        return -1;
    }

lastIndexOf：返回最后一次出现的下标，未出现返回-1
substring：子串
subSequence：子CharSequence

public CharSequence subSequence(int beginIndex, int endIndex) {
    return this.substring(beginIndex, endIndex);
}

concat：字符串拼接

public String concat(String str) {
	if (str.isEmpty()) {
		return this;
	}
    // 编码相同 数组连接起来构造新String
	if (coder() == str.coder()) {
		byte[] val = this.value;
		byte[] oval = str.value;
		int len = val.length + oval.length;
		byte[] buf = Arrays.copyOf(val, len);
		System.arraycopy(oval, 0, buf, val.length, oval.length);
		return new String(buf, coder);
	}
    // 编码不同 统一转成UTF16
	int len = length();
	int olen = str.length();
	byte[] buf = StringUTF16.newBytesFor(len + olen);
	getBytes(buf, 0, UTF16);
	str.getBytes(buf, len, UTF16);
	return new String(buf, UTF16);
}

replace：字符(串)替换，替换所有出现
matches：正则匹配
contains：包含

public boolean contains(CharSequence s) {
	return indexOf(s.toString()) >= 0;
}

replaceFirst：字符串替换，替换第一次出现
replaceAll：字符串正则替换
split：字符串分割，可添加限制数量
join：静态方法，将元素用delimiter连接起来，元素可以是CharSequence，或是迭代器中的元素

public static String join(CharSequence, CharSequence...);
public static String join(CharSequence, Iterable<? extends CharSequence>)

System.out.println(String.join(",", "ab", "c"));
System.out.println(String.join(",", Arrays.asList("ab", "c")));
// output:ab,c

toLowerCase：转为小写
toUpperCase：转为大写
trim：去掉开头结尾的所有空白字符(无法删掉unicode空白字符)

public static String trim(byte[] value) {
	int len = value.length;
	int st = 0;
	while ((st < len) && ((value[st] & 0xff) <= ' ')) {
		st++;
	}
	while ((st < len) && ((value[len - 1] & 0xff) <= ' ')) {
		len--;
	}
	return ((st > 0) || (len < value.length)) ?
	newString(value, st, len - st) : null;
}

strip：去掉开头结尾的所有空白字符
stripLeading：去掉开头空白
stripTrailing：去掉结尾空白
isBlank：是否只含有空白字符
lines：返回Stream

System.out.println("1
2
3
".lines().count());
// output:3

toCharArray：返回char数组
format：静态方法，字符串格式化
valueOf：静态方法，转化为字符串

public static String valueOf(Object obj) {
    return (obj == null) ? "null" : obj.toString();
}

copyValueOf：静态方法，将char[]复制为字符串
intern：JDK7之后，可理解为：将首次遇到的字符串加载到常量池中，并返回常量池中的引用
- 常量池中有该字符串的引用，则返回常量池中的引用
- 常量池中没有字符串的引用，则将字符串加载到常量池中，并返回该字符串对象的引用
repeat：重复字符串

public String repeat(int count) {
    if (count < 0) {
        throw new IllegalArgumentException("count is negative: " + count);
    }
    if (count == 1) {
        return this;
    }
    final int len = value.length;
    if (len == 0 || count == 0) {
        return "";
    }
    if (len == 1) {
        final byte[] single = new byte[count];
        Arrays.fill(single, value[0]);
        return new String(single, coder);
    }
    // 长度超出Integer.MAX_VALUE 会抛出异常
    if (Integer.MAX_VALUE / count < len) {
        throw new OutOfMemoryError("Repeating " + len + " bytes String " + count +
                " times will produce a String exceeding maximum size.");
    }
    final int limit = len * count;
    final byte[] multiple = new byte[limit];
    System.arraycopy(value, 0, multiple, 0, len);
    int copied = len;
    for (; copied < limit - copied; copied <<= 1) {
        System.arraycopy(multiple, 0, multiple, copied, copied);
    }
    System.arraycopy(multiple, 0, multiple, copied, limit - copied);
    return new String(multiple, coder);
}

注意事项

1. 内存分配

String s = "abc";
- 当常量池中不存在"abc"这个字符串的引用，在堆内存中new一个新的String对象，将这个对象的引用加入常量池。
- 当常量池中存在"abc"这个字符串的引用，s指向这个引用；
String s = new String("abc")：在堆上new一个对象
String s = a + b：在堆上new一个对象
String s = "a" + "b"：相当于Stirng s = "ab"

2. equals

推荐"常量字符串".equals(str)而不是str.equals("常量字符串")。
若str为null，则后者会报异常，而前者是安全的。