python判断一个字符是否是xml合法字符

项目中碰到的问题,记录如下,期望能对他人有用。

def valid_XML_char_ordinal(c):
    """
    @summary:
            check if the char is a valid xml character
    @param c: the character to be checked
    @see: # http://www.w3.org/TR/2008/REC-xml-20081126/#charsets
    @result: True/False
    """
    return ( # conditions ordered by presumed frequency
        0x20 <= c <= 0xD7FF
        or c in (0x09, 0x0A, 0x0D)
        or 0xE000 <= c <= 0xFFFD
        or 0x10000 <= c <= 0x10FFFF
        )

考虑对于其他非法xml字符通过base64加密处理,具体代码如下:

try:
    import xml.sax.saxutils
except ImportError:
    raise ImportError("requires xml.sax.saxutils package, pleas check if xml.sax.saxutils is installed!")
import base64
import logging

logger = logging.getLogger(__name__)

__all__ = ["escape", "unescape"]

def escape(data):
    """
    @summary:
            Escape '&', '<', and '>' in a string of data.
            if the data is not ascii, then encode in base64
    @param data: the data to be processed
    @return
        {"base64": True | False,
         "data": data}
    """

    # check if all of the data is in ascii code
    is_base64 = False
    escaped_data = ""
    try:
        #data.decode("ascii")
        if data is None:
            data = ""

        is_base64 = False
        for c in data:
            if not valid_XML_char_ordinal(c):
                is_base64 = True
                break
        # check if need base64 encode
        if is_base64:
            logger.debug("%s is not ascii-encoded string, so i will encode it in base64")
            # base64 encode
            escaped_data = base64.b64encode(data)
        else:
            # check if the data should be escaped to be stored in xml
            escaped_data = xml.sax.saxutils.escape(data)

    except Exception, e:
        logger.excpetion(e)

    return {"base64": is_base64,
            "data": escaped_data}

def unescape(data, is_base64 = False):
    """
    @summary:
            Unescape '&amp;', '&lt;', and '&gt;' in a string of data.
            if base64 is True, then base64 decode will be processed first
    @param data: the data to be processed
    @param base64: specify if the data is encoded by base64
    @result: unescaped data
    """
    # check if base64
    unescaped_data = data
    if is_base64:
        try:
            unescaped_data = base64.b64decode(data)
        except Exception, ex:
            logger.debug("some excpetion occured when invoke b64decode")
            logger.error(ex)
            print ex
    else:
        # unescape it
        unescaped_data = xml.sax.saxutils.unescape(data)

    return unescaped_data
原文地址:https://www.cnblogs.com/Jerryshome/p/2490394.html