CI框架源码学习笔记7——Utf8.php

愉快的清明节假期结束了,继续回到CI框架学习。这一节我们来看看Utf8.php文件,它主要是用来做utf8编码,废话不多说,上代码。

class CI_Utf8 {

    /**
     * Class constructor
     *
     * Determines if UTF-8 support is to be enabled.
     *
     * @return    void
     */
    //构造函数,判断utf8编码是否被支持
    public function __construct()
    {
        if (
            defined('PREG_BAD_UTF8_ERROR')                // 判断常量是否定义,用来判断PCRE是否支持utf8
            && (ICONV_ENABLED === TRUE OR MB_ENABLED === TRUE)    // 通过常量判断iconv和mbstring是否被安装 
            && strtoupper(config_item('charset')) === 'UTF-8'    // 通过配置判断当前的应用是否支持utf8
            )
        {
            //支持utf8编码
            define('UTF8_ENABLED', TRUE);
            log_message('debug', 'UTF-8 Support Enabled');
        }
        else
        {    //不支持utf8编码
            define('UTF8_ENABLED', FALSE);
            log_message('debug', 'UTF-8 Support Disabled');
        }

        log_message('info', 'Utf8 Class Initialized');
    }

    // --------------------------------------------------------------------

    /**
     * Clean UTF-8 strings
     *
     * Ensures strings contain only valid UTF-8 characters.
     *
     * @param    string    $str    String to clean
     * @return    string
     */
    //清洗字符串,确保字符串中只包含有效的utf8字符
    public function clean_string($str)
    {
        //如果字符串不是ascii编码
        if ($this->is_ascii($str) === FALSE)
        {
            //如果多字节字符串函数重载没有启用,则通过mb_convert_encoding来转换编码
            if (MB_ENABLED)
            {
                $str = mb_convert_encoding($str, 'UTF-8', 'UTF-8');
            }
            //否则如果iconv安装,那么通过iconv函数转换编码
            elseif (ICONV_ENABLED)
            {
                $str = @iconv('UTF-8', 'UTF-8//IGNORE', $str);
            }
        }

        return $str;
    }

    // --------------------------------------------------------------------

    /**
     * Remove ASCII control characters
     *
     * Removes all ASCII control characters except horizontal tabs,
     * line feeds, and carriage returns, as all others can cause
     * problems in XML.
     *
     * @param    string    $str    String to clean
     * @return    string
     */
    //删除所有在xml中可能导致问题的ASCII码字符,除了水平制表符,换行,回车
    public function safe_ascii_for_xml($str)
    {
        return remove_invisible_characters($str, FALSE);
    }

    // --------------------------------------------------------------------

    /**
     * Convert to UTF-8
     *
     * Attempts to convert a string to UTF-8.
     *
     * @param    string    $str        Input string
     * @param    string    $encoding    Input encoding
     * @return    string    $str encoded in UTF-8 or FALSE on failure
     */
    //将字符串转换为utf8编码
    //注意它与clean_string的不同是,这里是从指定的编码模式转换到utf8的
    public function convert_to_utf8($str, $encoding)
    {
        if (MB_ENABLED)
        {
            return mb_convert_encoding($str, 'UTF-8', $encoding);
        }
        elseif (ICONV_ENABLED)
        {
            return @iconv($encoding, 'UTF-8', $str);
        }

        return FALSE;
    }

    // --------------------------------------------------------------------

    /**
     * Is ASCII?
     *
     * Tests if a string is standard 7-bit ASCII or not.
     *
     * @param    string    $str    String to check
     * @return    bool
     */
    //通过正则判断是否是ascii编码的字符串
    public function is_ascii($str)
    {
        return (preg_match('/[^x00-x7F]/S', $str) === 0);
    }

}
原文地址:https://www.cnblogs.com/isuifeng/p/6675627.html