Java源码学习(JDK 11)——java.lang.String

定义

package java.lang;

public final class String implements java.io.Serializable, Comparable<String>, CharSequence {
	// ...
}
  • final 类,不能被继承
  • 实现 Serializable 接口,可序列化
  • 实现 Comparable 接口,可比较大小
  • 实现 CharSequence 接口,StringBuffer和Stringbuilder同样实现该接口

属性

@Stable						// never null
private final byte[] value;	// JDK 11 内部用byte数组储存值 

private final byte coder;	// 编码 LATIN1 或 UTF16 

static final boolean COMPACT_STRINGS;	// 字符串压缩

static {
	COMPACT_STRINGS = true;	
}

@Native static final byte LATIN1 = 0;
@Native static final byte UTF16  = 1;

private int hash; // 将hashcode缓存起来

public static final Comparator<String> CASE_INSENSITIVE_ORDER = new CaseInsensitiveComparator();    // 内部类
// ...
  • hash:缓存hashcode,String经常被比较,将hashcode缓存,提高效率。
  • value:JDK 8及以前,value用char数组存储,然而很多时候,字符只需要1个字节来表示。因此从JDK 9以后,value使用byte数组存储,并添加了coder,COMPACT_STRINGS字段,帮助压缩字符串存储空间。
  • coder:LATIN1表示1个字符占用1个byte;UTF16表示1个字符占用2个byte。
  • COMPACT_STRINGS:默认值为true。当值为false时,字符串必然以UTF16的形式存储。
    因此,当COMPACT_STRINGS=true并且每个字符都可用1个字节表示时,coder=LATIN1;否则coder=UTF16

内部类

private static class CaseInsensitiveComparator implements Comparator<String>, java.io.Serializable {
        // use serialVersionUID from JDK 1.2.2 for interoperability
    private static final long serialVersionUID = 8575799808933029326L;

    public int compare(String s1, String s2) {
        byte v1[] = s1.value;
        byte v2[] = s2.value;
        if (s1.coder() == s2.coder()) {
            return s1.isLatin1() ? StringLatin1.compareToCI(v1, v2)
            : StringUTF16.compareToCI(v1, v2);
        }
        return s1.isLatin1() ? StringLatin1.compareToCI_UTF16(v1, v2)
        : StringUTF16.compareToCI_Latin1(v1, v2);
    }

    /** Replaces the de-serialized object. */
    private Object readResolve() { return CASE_INSENSITIVE_ORDER; }
}

实现忽略大小写的字符串比较。
compareToIgnoreCase方法利用该内部类的方法实现。

构造方法

  • null
public String() {
	this.value = "".value;
	this.coder = "".coder;
}
  • char[]
public String(char value[]) {
	this(value, 0, value.length, null);
}

public String(char value[], int offset, int count) {
	this(value, offset, count, rangeCheck(value, offset, count));
}

private static Void rangeCheck(char[] value, int offset, int count) {
	checkBoundsOffCount(offset, count, value.length);	// 静态方法 如果数组越界会抛出StringIndexOutOfBoundsException
	return null;
}

String(char[] value, int off, int len, Void sig) {	// sig与public方法区别开
	if (len == 0) {
		this.value = "".value;
		this.coder = "".coder;
		return;
	}
	if (COMPACT_STRINGS) {
		byte[] val = StringUTF16.compress(value, off, len);
		if (val != null) {
			this.value = val;
			this.coder = LATIN1;
			return;
		}
	}
	this.coder = UTF16;
	this.value = StringUTF16.toBytes(value, off, len);
}

// StringUTF16.compress
public static byte[] compress(char[] val, int off, int len) {
	byte[] ret = new byte[len];
    if (compress(val, off, ret, 0, len) == len) {	// 压缩失败会返回0
    	return ret;	// LATIN1 编码
    }
    return null;
}

// StringUTF16.compress
// compressedCopy char[] -> byte[]
@HotSpotIntrinsicCandidate
public static int compress(char[] src, int srcOff, byte[] dst, int dstOff, int len) {
    for (int i = 0; i < len; i++) {
        char c = src[srcOff];
        if (c > 0xFF) {
            len = 0;
            break;
        }
        dst[dstOff] = (byte)c;	// char截断为byte
        srcOff++;
        dstOff++;
    }
    return len;
}
  • byte[]
// 与char[]类似 多了字符集的解码
// 参数可以是String类型的charsetName 也可以是CharSet类型
public String(byte bytes[], int offset, int length, String charsetName)
throws UnsupportedEncodingException {
	if (charsetName == null)
		throw new NullPointerException("charsetName");
	checkBoundsOffCount(offset, length, bytes.length);
	StringCoding.Result ret = StringCoding.decode(charsetName, bytes, offset, length);
	this.value = ret.value;
	this.coder = ret.coder;
}
  • StringBuffer/StringBuilder
// 内容复制 StringBuffer/StringBuilder修改不影响String
public String(StringBuffer buffer) {
	this(buffer.toString());
}
public String(StringBuilder builder) {
	this(builder, null);
}

方法

  • length:返回长度
// UTF16编码的需要将长度/2
public int length() {
	return value.length >> coder();		
}
byte coder() {
	// UTF16 = 1; LATIN1 = 0
	return COMPACT_STRINGS ? coder : UTF16;	
}
  • isEmpty:长度是否为0
  • charAt:某位置上的字符
  • getChars:获取char数组
  • getBytes:获取byte数组
  • equals:字符串相等
public boolean equals(Object anObject) {
    if (this == anObject) {
        return true;
    }
    if (anObject instanceof String) {
        String aString = (String)anObject;
        // 编码不同的字符串不可能相同 因为内容相同的字符串总是以相同编码存储
        if (coder() == aString.coder()) {
            return isLatin1() ? StringLatin1.equals(value, aString.value)
                              : StringUTF16.equals(value, aString.value);
        }
    }
    return false;
}
  • contentEquals:内容相同
public boolean contentEquals(CharSequence cs) {
    // Argument is a StringBuffer, StringBuilder
    if (cs instanceof AbstractStringBuilder) {
        if (cs instanceof StringBuffer) {
        	// 因为 StringBuffer 线程安全 所以加上synchronized
            synchronized(cs) {
               return nonSyncContentEquals((AbstractStringBuilder)cs);
            }
        } else {
            return nonSyncContentEquals((AbstractStringBuilder)cs);
        }
    }
    // Argument is a String
    if (cs instanceof String) {
        return equals(cs);
    }
    // Argument is a generic CharSequence
    int n = cs.length();
    if (n != length()) {
        return false;
    }
    byte[] val = this.value;
    if (isLatin1()) {
        for (int i = 0; i < n; i++) {
            if ((val[i] & 0xff) != cs.charAt(i)) {
                return false;
            }
        }
    } else {
        if (!StringUTF16.contentEquals(val, cs, n)) {
            return false;
        }
    }
    return true;
}
  • equalsIgnoreCase:忽略大小写字符串相同
  • compareTo:字符串比较,按字典序
  • compareToIgnoreCase:忽略大小写的字符串比较
  • regionMatches:字符串范围内相等
  • startsWith:是否以字符串开头
  • endsWith:是否以字符串结尾
public boolean endsWith(String suffix) {
    return startsWith(suffix, length() - suffix.length());
}
  • indexOf:返回第一次出现的下标,未出现返回-1
// StringLatin1.indexOf
public static int indexOf(byte[] value, int valueCount, byte[] str, int strCount, int fromIndex) {
    byte first = str[0];
    int max = (valueCount - strCount);
    for (int i = fromIndex; i <= max; i++) {
        // Look for first character.
        if (value[i] != first) {
            while (++i <= max && value[i] != first);
        }
        // Found first character, now look at the rest of value
        if (i <= max) {
            int j = i + 1;
            int end = j + strCount - 1;
            for (int k = 1; j < end && value[j] == str[k]; j++, k++);
                if (j == end) {
                    // Found whole string.
                    return i;
                }
            }
        }
        return -1;
    }
  • lastIndexOf:返回最后一次出现的下标,未出现返回-1
  • substring:子串
  • subSequence:子CharSequence
public CharSequence subSequence(int beginIndex, int endIndex) {
    return this.substring(beginIndex, endIndex);
}
  • concat:字符串拼接
public String concat(String str) {
	if (str.isEmpty()) {
		return this;
	}
    // 编码相同 数组连接起来构造新String
	if (coder() == str.coder()) {
		byte[] val = this.value;
		byte[] oval = str.value;
		int len = val.length + oval.length;
		byte[] buf = Arrays.copyOf(val, len);
		System.arraycopy(oval, 0, buf, val.length, oval.length);
		return new String(buf, coder);
	}
    // 编码不同 统一转成UTF16
	int len = length();
	int olen = str.length();
	byte[] buf = StringUTF16.newBytesFor(len + olen);
	getBytes(buf, 0, UTF16);
	str.getBytes(buf, len, UTF16);
	return new String(buf, UTF16);
}
  • replace:字符(串)替换,替换所有出现
  • matches:正则匹配
  • contains:包含
public boolean contains(CharSequence s) {
	return indexOf(s.toString()) >= 0;
}
  • replaceFirst:字符串替换,替换第一次出现
  • replaceAll:字符串正则替换
  • split:字符串分割,可添加限制数量
  • join:静态方法,将元素用delimiter连接起来,元素可以是CharSequence,或是迭代器中的元素
public static String join(CharSequence, CharSequence...);
public static String join(CharSequence, Iterable<? extends CharSequence>)
System.out.println(String.join(",", "ab", "c"));
System.out.println(String.join(",", Arrays.asList("ab", "c")));
// output:ab,c
  • toLowerCase:转为小写
  • toUpperCase:转为大写
  • trim:去掉开头结尾的所有空白字符(无法删掉unicode空白字符)
public static String trim(byte[] value) {
	int len = value.length;
	int st = 0;
	while ((st < len) && ((value[st] & 0xff) <= ' ')) {
		st++;
	}
	while ((st < len) && ((value[len - 1] & 0xff) <= ' ')) {
		len--;
	}
	return ((st > 0) || (len < value.length)) ?
	newString(value, st, len - st) : null;
}
  • strip:去掉开头结尾的所有空白字符
  • stripLeading:去掉开头空白
  • stripTrailing:去掉结尾空白
  • isBlank:是否只含有空白字符
  • lines:返回Stream
System.out.println("1
2
3
".lines().count());
// output:3
  • toCharArray:返回char数组
  • format:静态方法,字符串格式化
  • valueOf:静态方法,转化为字符串
public static String valueOf(Object obj) {
    return (obj == null) ? "null" : obj.toString();
}
  • copyValueOf:静态方法,将char[]复制为字符串
  • intern:JDK7之后,可理解为:将首次遇到的字符串加载到常量池中,并返回常量池中的引用
    • 常量池中有该字符串的引用,则返回常量池中的引用
    • 常量池中没有字符串的引用,则将字符串加载到常量池中,并返回该字符串对象的引用
  • repeat:重复字符串
public String repeat(int count) {
    if (count < 0) {
        throw new IllegalArgumentException("count is negative: " + count);
    }
    if (count == 1) {
        return this;
    }
    final int len = value.length;
    if (len == 0 || count == 0) {
        return "";
    }
    if (len == 1) {
        final byte[] single = new byte[count];
        Arrays.fill(single, value[0]);
        return new String(single, coder);
    }
    // 长度超出Integer.MAX_VALUE 会抛出异常
    if (Integer.MAX_VALUE / count < len) {
        throw new OutOfMemoryError("Repeating " + len + " bytes String " + count +
                " times will produce a String exceeding maximum size.");
    }
    final int limit = len * count;
    final byte[] multiple = new byte[limit];
    System.arraycopy(value, 0, multiple, 0, len);
    int copied = len;
    for (; copied < limit - copied; copied <<= 1) {
        System.arraycopy(multiple, 0, multiple, copied, copied);
    }
    System.arraycopy(multiple, 0, multiple, copied, limit - copied);
    return new String(multiple, coder);
}

注意事项

1. 内存分配
  • String s = "abc";
    • 当常量池中不存在"abc"这个字符串的引用,在堆内存中new一个新的String对象,将这个对象的引用加入常量池。
    • 当常量池中存在"abc"这个字符串的引用,s指向这个引用;
  • String s = new String("abc"):在堆上new一个对象
  • String s = a + b:在堆上new一个对象
  • String s = "a" + "b":相当于Stirng s = "ab"
2. equals

推荐"常量字符串".equals(str)而不是str.equals("常量字符串")
若str为null,则后者会报异常,而前者是安全的。

原文地址:https://www.cnblogs.com/JL916/p/12435349.html