从字符串中提取单词、从字符串中提取汉字的函数

//单元 
{
从字符串中提取单词的函数} SysUtils.CharInSet();

procedure StrToWordList(str: string; var List: TStringList); var p: PChar; i: Integer; begin if List = nil then List := TStringList.Create; List.Clear; {去除重复} List.Sorted := True; List.Duplicates := dupIgnore; p := PChar(str); {把单词以外的字符转为空格, 并把大写字母转小写} while p^ <> #0 do begin case p^ of 'A'..'Z': p^ := Chr(Ord(p^) + 32); 'a'..'z', '0'..'9', '''', '-': ; else p^ := #32; end; Inc(p); end; {用空格分离单词到列表} List.Delimiter := #32; List.DelimitedText := str; {单词的开头应该是字母, 去除其他} for i := List.Count - 1 downto 0 do begin if CharInSet(List[i][1], ['0'..'9', '-', '''']) then List.Delete(i); end; end; {从字符串中提取汉字的函数} procedure StrToHanZiList(str: string; var List: TStringList); var p: PWideChar; begin if List = nil then List := TStringList.Create; List.Clear; {去除重复} List.Sorted := True; List.Duplicates := dupIgnore; p := PWideChar(str); while p^ <> #0 do begin case p^ of #$4E00..#$9FA5: List.Add(p^); end; Inc(p); end; end;
原文地址:https://www.cnblogs.com/westsoft/p/10220128.html