获取文章的字数或则字符数

package test;

/**
 * 获取文章的字数或则字符数
 * 
 * @author montao
 */
public class StatWordCount {
	public static void main(String[] args) {
		StatWordCount wordCount = new StatWordCount();
		String str = " 扯淡 111啊a 1";

		System.out.println(wordCount.getWordCharacter(str, false, false));
	}

	private final char[] CHS = { ',', ';', '!', '.', '!', '?', ';', '+', ',',
			'?', '!', '/' }; // 符号数组

	private final char[] CHN = { '
', '
' }; // 转义符数组

	private final char[] SPACE = { ' ', ' ' }; // 空格的数组(前半角,后全角)

	/**
	 * 根据指定条件来筛选文章的字数
	 * 
	 * @param wordContent
	 *            文章内容
	 * @param compriseInterpunction
	 *            是否包含指定字符
	 * @param compriseSpace
	 *            是否包含空格
	 * @return 返回文章经过指定筛选后的长度
	 */
	public int getWordCount(String wordContent, boolean compriseInterpunction,
			boolean compriseSpace) {
		if (wordContent == null) {
			return 0;
		} else if (wordContent.length() == 0) {
			return 0;
		} else {
			// 既要包含符号又要包含空格
			if (compriseInterpunction && compriseSpace) {
				// 清除转义符
				String regex = "[" + new String(CHN) + "]";
				wordContent = wordContent.replaceAll(regex, " ");
				return this.getWordCount(wordContent);
			}
			// 不包含符号包含空格
			else if (!compriseInterpunction && compriseSpace) {
				// 使用正则表达式去掉指定的符号和转义符
				String regex1 = "[" + new String(CHN) + "]";
				String regex2 = "[" + new String(CHS) + "]";
				wordContent = wordContent.replaceAll(regex1, " ");
				wordContent = wordContent.replaceAll(regex2, " ");
				return this.getWordCount(wordContent);
			}
			// 包含指定符号不包含空格
			else if (compriseInterpunction && !compriseSpace) {
				// 使用正则表达式去掉空格和转义符
				String regex1 = "[" + new String(CHN) + "]";
				String regex2 = "[" + new String(SPACE) + "]";
				wordContent = wordContent.replaceAll(regex1, " ");
				wordContent = wordContent.replaceAll(regex2, " ");
				return this.getWordCount(wordContent);
			}
			// 空格和指定符号都不包含
			else {
				// 使用正则表达式去掉空格,指定符号和转义符
				String regex1 = "[" + new String(CHN) + "]";
				String regex3 = "[" + new String(CHS) + "]";
				String regex2 = "[" + new String(SPACE) + "]";
				wordContent = wordContent.replaceAll(regex1, " ");
				wordContent = wordContent.replaceAll(regex2, " ");
				wordContent = wordContent.replaceAll(regex3, " ");
				return this.getWordCount(wordContent);
			}
		}
	}

	/**
	 * 返回文章中的字数
	 * 
	 * @param wordCount
	 *            文章内容
	 * @return
	 */
	@SuppressWarnings("unused")
	private int getWordCount(String wordContent) {
		int count = 0;
		if (wordContent == null) { // 判断是否为null,如果为null直接返回0
			count = 0;
		} else if (wordContent.length() == 0) { // 判断是否为空,如果为空直接返回0
			count = 0;
		} else { // 判断获取字数
			wordContent = wordContent.trim(); // 清空空格
			// 临时变量
			String s4 = "";
			String s3 = "";
			String s2 = "";
			String s1 = "";
			boolean bb = false;
			if (wordContent.length() > 0) {
				s4 = String
						.valueOf(wordContent.charAt(wordContent.length() - 1));
			}
			for (int i = 0; i < wordContent.length(); i++) {
				s3 = String.valueOf(wordContent.charAt(i));
				int num = s3.getBytes().length;
				if (s3.hashCode() == 32 || s3.getBytes().length == 2) {
					bb = true;
				}
				if (num == 2) {
					count++;
				} else {
					if (i + 1 < wordContent.length()) {
						s1 = String.valueOf(wordContent.charAt(i + 1));
						if ((s1.hashCode() == 32 && (s3.hashCode() != 32))
								|| ((s1.getBytes().length == 2) && (s3
										.hashCode() != 32))) {
							count++;
						}
					}
				}
			}
			if (!bb) {
				count++;
			} else {
				if (s4.getBytes().length == 1) {
					count++;
				}
			}
		}
		return count;
	}

	/**
	 * 根据条件来获取文章的字符数
	 * 
	 * @param wordContent
	 *            文章内容
	 * @param compriseInterpunction
	 *            是否包含指定符号
	 * @param compriseSpace
	 *            是否包含空格
	 * @return 返回字符长度
	 */
	public int getWordCharacter(String wordContent,
			boolean compriseInterpunction, boolean compriseSpace) {
		// 既要包含符号又要包含空格
		if (compriseInterpunction && compriseSpace) {
			// 清除转义符
			String regex = "[" + new String(CHN) + "]";
			wordContent = wordContent.replaceAll(regex, "");
			// 首部的空格不算
			wordContent = wordContent.replaceAll("^\s+", "");
			return wordContent.length();
		}// 不包含符号包含空格
		else if (!compriseInterpunction && compriseSpace) {
			// 首部的空格不算
			wordContent = wordContent.replaceAll("^\s+", "");
			// 使用正则表达式去掉指定的符号和转义符
			String regex1 = "[" + new String(CHN) + "]";
			String regex2 = "[" + new String(CHS) + "]";
			wordContent = wordContent.replaceAll(regex1, "");
			wordContent = wordContent.replaceAll(regex2, "");
			return wordContent.length();
		}// 包含指定符号不包含空格
		else if (compriseInterpunction && !compriseSpace) {
			// 使用正则表达式去掉空格和转义符
			String regex = "[" + new String(SPACE) + "]";
			wordContent = wordContent.replaceAll(regex, " ");
			return this.getNoSpaceCount(wordContent);
		}// 空格和指定符号都不包含
		else {
			// 使用正则表达式去掉指定符号
			String regex1 = "[" + new String(CHS) + "]";
			String regex2 = "[" + new String(SPACE) + "]";
			wordContent = wordContent.replaceAll(regex1, " ");
			wordContent = wordContent.replaceAll(regex2, " ");
			return this.getNoSpaceCount(wordContent);
		}
	}

	/**
	 * 获取文章中非空格的字符总数
	 * 
	 * @param wordContent
	 *            文章内容
	 * @return
	 */
	private int getNoSpaceCount(String wordContent) {
		int spaceCount = 0;
		if (wordContent == null) {
			spaceCount = 0;
		} else if (wordContent.length() == 0) {
			spaceCount = 0;
		} else {
			// 替换首部的
			wordContent = wordContent.replaceAll("^\s+", "");
			wordContent = wordContent.replaceAll(" ", "");
			// 使用正则替换转义符
			String regex = "[" + new String(CHN) + "]";
			wordContent = wordContent.replaceAll(regex, "");
			spaceCount = wordContent.length();
		}
		return spaceCount;
	}
}

感谢作者,转载自:点击打开链接


原文地址:https://www.cnblogs.com/xiaozhang2014/p/5297280.html