C#基础实现URL Unicode编码,编码、解码相关整理

C#基础】实现URL Unicode编码,编码、解码相关整理

1、Unicode编码  引用系统 System.Web

using System.Web;

string postdata = "SAMLRequest=" + HttpUtility.UrlEncode(SAMLRequest) + "&RelayState=" + RelayState;

2、Unicode编码 自己封装的方法

//实现URL编码
public static string UrlEncode(string str){
            StringBuilder sb = new StringBuilder();
            byte[] byStr = System.Text.Encoding.UTF8.GetBytes(str); //默认是System.Text.Encoding.Default.GetBytes(str)
            for (int i = 0; i < byStr.Length; i++)
            {
                sb.Append(@"%" + Convert.ToString(byStr[i], 16));
            }

            return (sb.ToString());
        }

3、\u50FA\u49AD 转换成 "中国"

     ///<summary>
        /// \u50FA\u49AD 转换成 "中国"
     ///\u8eab\u4efd\u9a8c\u8bc1\u5931\u8d25,\u8bf7\u6ce8\u9500\u5e10\u53f7\u540e\u91cd\u65b0\u767b\u5f55
        ///<summary>
        public static string NormalU2C(string input)
        {
            string str = "";
            char[] chArray = input.ToCharArray();
            Encoding bigEndianUnicode = Encoding.BigEndianUnicode;
            for (int i = 0; i < chArray.Length; i++)
            {
                char ch = chArray[i];
                if (ch.Equals('\\'))
                {
                    i++;
                    i++;
                    char[] chArray2 = new char[4];
                    int index = 0;
                    index = 0;
                    while ((index < 4) && (i < chArray.Length))
                    {
                        chArray2[index] = chArray[i];
                        index++;
                        i++;
                    }
                    if (index == 4)
                    {
                        try
                        {
                            str = str + UnicodeCode2Str(chArray2);
                        }
                        catch (Exception)
                        {
                            str = str + @"\u";
                            for (int j = 0; j < index; j++)
                            {
                                str = str + chArray2[j];
                            }
                        }
                        i--;
                    }
                    else
                    {
                        str = str + @"\u";
                        for (int k = 0; k < index; k++)
                        {
                            str = str + chArray2[k];
                        }
                    }
                }
                else
                {
                    str = str + ch.ToString();
                }
            }
            return str;
        }

        ///<summary>
        /// UnicodeCode字节 转换成 "中国"
     /// 上面的方法引用此方法
        ///<summary>
        public static string UnicodeCode2Str(char[] u4)
        {
            if (u4.Length < 4)
            {
                throw new Exception("It's not a unicode code array");
            }
            string str = "0123456789ABCDEF";
            char ch = char.ToUpper(u4[0]);
            char ch2 = char.ToUpper(u4[1]);
            char ch3 = char.ToUpper(u4[2]);
            char ch4 = char.ToUpper(u4[3]);
            int index = str.IndexOf(ch);
            int num2 = str.IndexOf(ch2);
            int num3 = str.IndexOf(ch3);
            int num4 = str.IndexOf(ch4);
            if (((index == -1) || (num2 == -1)) || ((num3 == -1) || (num4 == -1)))
            {
                throw new Exception("It's not a unicode code array");
            }
            byte num5 = (byte)(((index * 0x10) + num2) & 0xff);
            byte num6 = (byte)(((num3 * 0x10) + num4) & 0xff);
            byte[] bytes = new byte[] { num5, num6 };
            return Encoding.BigEndianUnicode.GetString(bytes);
        }

4、网页ASCII转换成Unicode

    //网页ASCII转换成Unicode
        public string HtmlEncoding(string htmltext)
        {
            string text = "";
            IHTMLDocument2 doc = new HTMLDocumentClass();
            doc.write(new object[] { htmltext });
            doc.close();
            text = doc.body.innerText;
            return text;
        }

5、解析html的NCR编码方法

//解析html的NCR编码方法
        public string NCRtoString(string htmltext)
        {
            string result = "";
            try
            {
                RegexHelper.GetMatchStr(htmltext, "<body>(.*?)</body>", out htmltext);
                htmltext = htmltext.Replace("\t", "").Replace("\r", "").Replace("\n", "").Replace(" ", "");
                htmltext = Regex.Replace(htmltext,"<[^>]*>","");
                htmltext = htmltext.Replace("&#x", "\\u").Replace(";", "");
                string[] strlist = htmltext.Replace("\\", "").Split('u');
                for (int i = 1; i < strlist.Length; i++)
                {
                    if (strlist[i].Length!=4)
                    {
                        strlist[i] = strlist[i].Substring(0,4);
                    }
                    //将unicode字符转为10进制整数,然后转为char中文字符
                    result += (char)int.Parse(strlist[i], System.Globalization.NumberStyles.HexNumber);
                }
            }
            catch (Exception)
            {
                return "解析html的NCR编码方法异常";
            }
            return result;
        }
 

6、C#实现escape编码

     //C#实现escape编码
        public static string UrlEncode(string s)
        {
            StringBuilder sb = new StringBuilder();
            byte[] ba = System.Text.Encoding.Unicode.GetBytes(s);
            for (int i = 0; i < ba.Length; i += 2)
            {
                sb.Append("%25u");
                sb.Append(ba[i + 1].ToString("X2"));
                sb.Append(ba[i].ToString("X2"));
            }
            return sb.ToString();
        }

7、将Unicode编码转换为汉字字符串

        /// <summary>

        /// 汉字转换为Unicode编码

        /// </summary>

        /// <param name="str">要编码的汉字字符串</param>

        /// <returns>Unicode编码的的字符串</returns>

        public static string ToUnicode(string str)
        {

            byte[] bts = Encoding.Unicode.GetBytes(str);

            string r = "";

            for (int i = 0; i < bts.Length; i += 2) r += "\\u" + bts[i + 1].ToString("x").PadLeft(2, '0') + bts[i].ToString("x").PadLeft(2, '0');

            return r;

        }

        /// <summary>

        /// 将Unicode编码转换为汉字字符串

        /// </summary>

        /// <param name="str">Unicode编码字符串</param>

        /// <returns>汉字字符串</returns>

        public static string ToGB2312(string str)
        {

            string r = "";

            MatchCollection mc = Regex.Matches(str, @"\\u([\w]{2})([\w]{2})", RegexOptions.Compiled | RegexOptions.IgnoreCase);

            byte[] bts = new byte[2];

            foreach (Match m in mc)
            {

                bts[0] = (byte)int.Parse(m.Groups[2].Value, NumberStyles.HexNumber);

                bts[1] = (byte)int.Parse(m.Groups[1].Value, NumberStyles.HexNumber);

                r += Encoding.Unicode.GetString(bts);

            }

            return r;

        }
原文地址:https://www.cnblogs.com/grj001/p/12223798.html