python不同语言的字符串连接成文本

python不同语言的字符串连接成文本

 # -*- coding:utf-8 -*- #

import sys
import unicodedata
import six

_ALPHANUMERIC_CHAR_SET = set(
    six.unichr(i) for i in xrange(sys.maxunicode)
    if (unicodedata.category(six.unichr(i)).startswith("L") or
        unicodedata.category(six.unichr(i)).startswith("N")))


def _join_tokens_to_string(tokens):
  """Join a list of string tokens into a single string."""
  token_is_alnum = [t[0] in _ALPHANUMERIC_CHAR_SET for t in tokens]
  ret = []
  for i, token in enumerate(tokens):
    if i > 0 and token_is_alnum[i - 1] and token_is_alnum[i]:
      ret.append(u" ")
    token = token.decode("utf-8")
    ret.append(token)
  return "".join(ret)


if __name__ == '__main__':
    texts = [['hello','world'], ['mehr', 'Sicherheit', 'für'], ["从40万年前","开始"]]
    for text in texts:
        ret = _join_tokens_to_string(text)
        print(ret.encode("utf-8"))

输出结果:

hello world
mehr Sicherheit für
从40万年前开始

原文地址:https://www.cnblogs.com/cydcyd/p/13895854.html