Markdown.Sanitizer.js 3.78 KB
Newer Older
1
(function() {
Rocky Duan committed
2
    var output, Converter;
3
    if (typeof exports === 'object' && typeof require === 'function') { // we're in a CommonJS (e.g. Node.js) module
Rocky Duan committed
4
        output = exports;
5
        Converter = require('./Markdown.Converter').Converter;
Rocky Duan committed
6 7 8 9
    } else {
        output = window.Markdown;
        Converter = output.Converter;
    }
10 11

    output.getSanitizingConverter = function() {
Rocky Duan committed
12
        var converter = new Converter();
13 14
        converter.hooks.chain('postConversion', sanitizeHtml);
        converter.hooks.chain('postConversion', balanceTags);
Rocky Duan committed
15
        return converter;
16
    };
Rocky Duan committed
17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33

    function sanitizeHtml(html) {
        return html.replace(/<[^>]*>?/gi, sanitizeTag);
    }

    // (tags that can be opened/closed) | (tags that stand alone)
    var basic_tag_whitelist = /^(<\/?(b|blockquote|code|del|dd|dl|dt|em|h1|h2|h3|i|kbd|li|ol|p|pre|s|sup|sub|strong|strike|ul)>|<(br|hr)\s?\/?>)$/i;
    // <a href="url..." optional title>|</a>
    var a_white = /^(<a\shref="((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+"(\stitle="[^"<>]+")?\s?>|<\/a>)$/i;

    // <img src="url..." optional width  optional height  optional alt  optional title
    var img_white = /^(<img\ssrc="(https?:\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+"(\swidth="\d{1,3}")?(\sheight="\d{1,3}")?(\salt="[^"<>]*")?(\stitle="[^"<>]*")?\s?\/?>)$/i;

    function sanitizeTag(tag) {
        if (tag.match(basic_tag_whitelist) || tag.match(a_white) || tag.match(img_white))
            return tag;
        else
34
            return '';
Rocky Duan committed
35 36
    }

37 38 39 40 41 42 43 44
    // / <summary>
    // / attempt to balance HTML tags in the html string
    // / by removing any unmatched opening or closing tags
    // / IMPORTANT: we *assume* HTML has *already* been
    // / sanitized and is safe/sane before balancing!
    // /
    // / adapted from CODESNIPPET: A8591DBA-D1D3-11DE-947C-BA5556D89593
    // / </summary>
Rocky Duan committed
45
    function balanceTags(html) {
46 47
        if (html == '')
            return '';
Rocky Duan committed
48 49 50 51 52 53 54 55 56 57 58 59

        var re = /<\/?\w+[^>]*(\s|$|>)/g;
        // convert everything to lower case; this makes
        // our case insensitive comparisons easier
        var tags = html.toLowerCase().match(re);

        // no HTML tags present? nothing to do; exit now
        var tagcount = (tags || []).length;
        if (tagcount == 0)
            return html;

        var tagname, tag;
60
        var ignoredtags = '<p><img><br><li><hr>';
Rocky Duan committed
61 62 63 64 65 66 67
        var match;
        var tagpaired = [];
        var tagremove = [];
        var needsRemoval = false;

        // loop through matched tags in forward order
        for (var ctag = 0; ctag < tagcount; ctag++) {
68
            tagname = tags[ctag].replace(/<\/?(\w+).*/, '$1');
Rocky Duan committed
69 70
            // skip any already paired tags
            // and skip tags in our ignore list; assume they're self-closed
71
            if (tagpaired[ctag] || ignoredtags.search('<' + tagname + '>') > -1)
Rocky Duan committed
72 73 74 75 76 77 78 79 80
                continue;

            tag = tags[ctag];
            match = -1;

            if (!/^<\//.test(tag)) {
                // this is an opening tag
                // search forwards (next tags), look for closing tags
                for (var ntag = ctag + 1; ntag < tagcount; ntag++) {
81
                    if (!tagpaired[ntag] && tags[ntag] == '</' + tagname + '>') {
Rocky Duan committed
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99
                        match = ntag;
                        break;
                    }
                }
            }

            if (match == -1)
                needsRemoval = tagremove[ctag] = true; // mark for removal
            else
                tagpaired[match] = true; // mark paired
        }

        if (!needsRemoval)
            return html;

        // delete all orphaned tags from the string

        var ctag = 0;
100 101
        html = html.replace(re, function(match) {
            var res = tagremove[ctag] ? '' : match;
Rocky Duan committed
102 103 104 105 106 107
            ctag++;
            return res;
        });
        return html;
    }
})();