纯文本中识别URI地址并转换成HTML

问题

有一段纯文本text, 欲将其插入DOM节点div中. text中可能有超链接, 邮件地址等. 假设有, 识别之.

分析

  1. 假设仅仅是纯文本, 插入div中, 仅仅要将div.innerText设置为text就可以.
  2. text中的URI地址能够用正则识别, 并将其替换为<a/>标签组成的字符串. 此时 text变成了HTML字符串html.
  3. HTML字符串html能够赋值给div.innerHTML. 但假设原text中存在HTML语义的 字符串呢?

    因此, 在识别URI之前, 须要将原text作转义.

解决

uri-recognition.js

(function () {
    var trim = function (s) {
        /*jslint eqeq:true*/
        if (s == null || s === '') {
            return '';
        }
        // s 空格
        // 	 制表符
        // xA0 non-breaking spaces
        // 3000中文空格
        return String(s).replace(/^[s	xA03000]+/, '').
            replace(/[s	xA03000]+$/, '');
    },
    startsWith = function (s, sub) {
        s = String(s);
        return s.indexOf(sub) === 0;
    },
    test = function (str) {
        /*jslint maxlen: 100*/
        var URI_REG = /(https?://|www.|ssh://|ftp://)[a-z0-9&_+-?/.=#]+/i,
            MAIL_REG = /[a-z0-9_+-.]+@[a-z0-9_+-.]+/i;
        str = trim(String(str));
        return URI_REG.test(str) || MAIL_REG.test(str) || false;
    },
    /**
     * @param {String} str
     * @param {Function} replacer
     */
    replace = function (str, replacer) {
        /*jslint maxlen: 100*/
        var URI_REG = /(https?://|www.|ssh://|ftp://)[a-z0-9&_+-?

/.=#]+/gi, MAIL_REG = /[a-z0-9_+-.]+@[a-z0-9_+-.]+/gi; str = trim(String(str)); str = str.replace(URI_REG, function (match) { var newStr = replacer({ mail: false, fullURI: startsWith(match.toLowerCase(), 'www.') ?

('http://' + match) : match, match: match }); /*jslint eqeq: true*/ return newStr == null ?

match : newStr; }); str = str.replace(MAIL_REG, function (match) { var newStr = replacer({ mail: true, fullURI: 'mailto:' + match, match: match }); /*jslint eqeq: true*/ return newStr == null ?

match : newStr; }); return str; }, uriRecognition = function (text) { var doc = document, html; text = trim(String(text)); if (test(text)) { //use {} to escape text = text.replace(/{<}/g, '{{<}}'). replace(/{>}/g, '{{>}}'). replace(/</g, '{<}'). replace(/>/g, '{>}'); html = replace(text, function (info) { if (!info || !info.match || !info.fullURI) { return null; } var link = doc.createElement('a'); link.setAttribute('href', info.fullURI); /*jslint eqeq: true*/ if (link.innerText != null) { link.innerText = info.match; } else if (link.textContent != null) { link.textContent = info.match; } return link.outerHTML; }); html = html.replace(/{<}/g, '<'). replace(/{>}/g, '>'); return { content: html, isPlainText: false }; } return { content: text, isPlainText: true }; }, setContentWithURIRecognition = function (el, text) { var result = uriRecognition(text); if (!result) { return; } if (result.isPlainText) { if (el.innerText != null) { el.innerText = result.content; } else if (el.textContent != null) { el.textContent = result.content; } } else { el.innerHTML = result.content; } }; window.uriRecognition = uriRecognition; window.setContentWithURIRecognition = setContentWithURIRecognition; })();



test.html

<!DOCTYPE HTML>
<html>
    <head>
        <meta http-equiv="content-type" content="text/html; charset=utf-8">
        <title>uri regcognition</title>
    </head>
    <body>
        <script src="./uri-recognition.js" type="text/javascript"></script>
        <script type="text/javascript">
            var text = '<a href="http://china.haiwainet.cn/n/2014/0509/c232587-20619235.html" ' +
                    'mon="ct=1&a=2&c=top&pn=8" target="_blank">' +
                    '纽约时报:阿里巴巴IPO将风险推向全新水平</a>' +
                    ' send to example@example.com xxxx',
                div = document.createElement('div');

            window.setContentWithURIRecognition(div, text);
            document.body.appendChild(div);
        </script>
    </body>
</html>


Chrome下測试OK.

原文地址:https://www.cnblogs.com/gccbuaa/p/6914157.html