/*jshint curly:true, eqeqeq:true, laxbreak:true, noempty:false */ /* The MIT License (MIT) Copyright (c) 2007-2013 Einar Lielmanis and contributors. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. Style HTML --------------- Written by Nochum Sossonko, (nsossonko@hotmail.com) Based on code initially developed by: Einar Lielmanis, http://jsbeautifier.org/ Usage: style_html(html_source); style_html(html_source, options); The options are: indent_inner_html (default false) — indent and sections, indent_size (default 4) — indentation size, indent_char (default space) — character to indent with, wrap_line_length (default 250) - maximum amount of characters per line (0 = disable) brace_style (default "collapse") - "collapse" | "expand" | "end-expand" put braces on the same line as control statements (default), or put braces on own line (Allman / ANSI style), or just put end braces on own line. unformatted (defaults to inline tags) - list of tags, that shouldn't be reformatted indent_scripts (default normal) - "keep"|"separate"|"normal" preserve_newlines (default true) - whether existing line breaks before elements should be preserved Only works before elements, not inside tags or for text. max_preserve_newlines (default unlimited) - maximum number of line breaks to be preserved in one chunk indent_handlebars (default false) - format and indent {{#foo}} and {{/foo}} e.g. style_html(html_source, { 'indent_inner_html': false, 'indent_size': 2, 'indent_char': ' ', 'wrap_line_length': 78, 'brace_style': 'expand', 'unformatted': ['a', 'sub', 'sup', 'b', 'i', 'u'], 'preserve_newlines': true, 'max_preserve_newlines': 5, 'indent_handlebars': false }); */ (function() { function trim(s) { return s.replace(/^\s+|\s+$/g, ''); } function ltrim(s) { return s.replace(/^\s+/g, ''); } function style_html(html_source, options, js_beautify, css_beautify) { //Wrapper function to invoke all the necessary constructors and deal with the output. var multi_parser, indent_inner_html, indent_size, indent_character, wrap_line_length, brace_style, unformatted, preserve_newlines, max_preserve_newlines, indent_handlebars; options = options || {}; // backwards compatibility to 1.3.4 if ((options.wrap_line_length === undefined || parseInt(options.wrap_line_length, 10) === 0) && (options.max_char !== undefined && parseInt(options.max_char, 10) !== 0)) { options.wrap_line_length = options.max_char; } indent_inner_html = (options.indent_inner_html === undefined) ? false : options.indent_inner_html; indent_size = (options.indent_size === undefined) ? 4 : parseInt(options.indent_size, 10); indent_character = (options.indent_char === undefined) ? ' ' : options.indent_char; brace_style = (options.brace_style === undefined) ? 'collapse' : options.brace_style; wrap_line_length = parseInt(options.wrap_line_length, 10) === 0 ? 32786 : parseInt(options.wrap_line_length || 250, 10); unformatted = options.unformatted || ['a', 'span', 'bdo', 'em', 'strong', 'dfn', 'code', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'q', 'sub', 'sup', 'tt', 'i', 'b', 'big', 'small', 'u', 's', 'strike', 'font', 'ins', 'del', 'pre', 'address', 'dt', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6']; preserve_newlines = (options.preserve_newlines === undefined) ? true : options.preserve_newlines; max_preserve_newlines = preserve_newlines ? (isNaN(parseInt(options.max_preserve_newlines, 10)) ? 32786 : parseInt(options.max_preserve_newlines, 10)) : 0; indent_handlebars = (options.indent_handlebars === undefined) ? false : options.indent_handlebars; function Parser() { this.pos = 0; //Parser position this.token = ''; this.current_mode = 'CONTENT'; //reflects the current Parser mode: TAG/CONTENT this.tags = { //An object to hold tags, their position, and their parent-tags, initiated with default values parent: 'parent1', parentcount: 1, parent1: '' }; this.tag_type = ''; this.token_text = this.last_token = this.last_text = this.token_type = ''; this.newlines = 0; this.indent_content = indent_inner_html; this.Utils = { //Uilities made available to the various functions whitespace: "\n\r\t ".split(''), single_token: 'br,input,link,meta,!doctype,basefont,base,area,hr,wbr,param,img,isindex,?xml,embed,?php,?,?='.split(','), //all the single tags for HTML extra_liners: 'head,body,/html'.split(','), //for tags that need a line of whitespace before them in_array: function(what, arr) { for (var i = 0; i < arr.length; i++) { if (what === arr[i]) { return true; } } return false; } }; this.traverse_whitespace = function() { var input_char = ''; input_char = this.input.charAt(this.pos); if (this.Utils.in_array(input_char, this.Utils.whitespace)) { this.newlines = 0; while (this.Utils.in_array(input_char, this.Utils.whitespace)) { if (preserve_newlines && input_char === '\n' && this.newlines <= max_preserve_newlines) { this.newlines += 1; } this.pos++; input_char = this.input.charAt(this.pos); } return true; } return false; }; this.get_content = function() { //function to capture regular content between tags var input_char = '', content = [], space = false; //if a space is needed while (this.input.charAt(this.pos) !== '<') { if (this.pos >= this.input.length) { return content.length ? content.join('') : ['', 'TK_EOF']; } if (this.traverse_whitespace()) { if (content.length) { space = true; } continue; //don't want to insert unnecessary space } if (indent_handlebars) { // Handlebars parsing is complicated. // {{#foo}} and {{/foo}} are formatted tags. // {{something}} should get treated as content, except: // {{else}} specifically behaves like {{#if}} and {{/if}} var peek3 = this.input.substr(this.pos, 3); if (peek3 === '{{#' || peek3 === '{{/') { // These are tags and not content. break; } else if (this.input.substr(this.pos, 2) === '{{') { if (this.get_tag(true) === '{{else}}') { break; } } } input_char = this.input.charAt(this.pos); this.pos++; if (space) { if (this.line_char_count >= this.wrap_line_length) { //insert a line when the wrap_line_length is reached this.print_newline(false, content); this.print_indentation(content); } else { this.line_char_count++; content.push(' '); } space = false; } this.line_char_count++; content.push(input_char); //letter at-a-time (or string) inserted to an array } return content.length ? content.join('') : ''; }; this.get_contents_to = function(name) { //get the full content of a script or style to pass to js_beautify if (this.pos === this.input.length) { return ['', 'TK_EOF']; } var input_char = ''; var content = ''; var reg_match = new RegExp('', 'igm'); reg_match.lastIndex = this.pos; var reg_array = reg_match.exec(this.input); var end_script = reg_array ? reg_array.index : this.input.length; //absolute end of script if (this.pos < end_script) { //get everything in between the script tags content = this.input.substring(this.pos, end_script); this.pos = end_script; } return content; }; this.record_tag = function(tag) { //function to record a tag and its parent in this.tags Object if (this.tags[tag + 'count']) { //check for the existence of this tag type this.tags[tag + 'count']++; this.tags[tag + this.tags[tag + 'count']] = this.indent_level; //and record the present indent level } else { //otherwise initialize this tag type this.tags[tag + 'count'] = 1; this.tags[tag + this.tags[tag + 'count']] = this.indent_level; //and record the present indent level } this.tags[tag + this.tags[tag + 'count'] + 'parent'] = this.tags.parent; //set the parent (i.e. in the case of a div this.tags.div1parent) this.tags.parent = tag + this.tags[tag + 'count']; //and make this the current parent (i.e. in the case of a div 'div1') }; this.retrieve_tag = function(tag) { //function to retrieve the opening tag to the corresponding closer if (this.tags[tag + 'count']) { //if the openener is not in the Object we ignore it var temp_parent = this.tags.parent; //check to see if it's a closable tag. while (temp_parent) { //till we reach '' (the initial value); if (tag + this.tags[tag + 'count'] === temp_parent) { //if this is it use it break; } temp_parent = this.tags[temp_parent + 'parent']; //otherwise keep on climbing up the DOM Tree } if (temp_parent) { //if we caught something this.indent_level = this.tags[tag + this.tags[tag + 'count']]; //set the indent_level accordingly this.tags.parent = this.tags[temp_parent + 'parent']; //and set the current parent } delete this.tags[tag + this.tags[tag + 'count'] + 'parent']; //delete the closed tags parent reference... delete this.tags[tag + this.tags[tag + 'count']]; //...and the tag itself if (this.tags[tag + 'count'] === 1) { delete this.tags[tag + 'count']; } else { this.tags[tag + 'count']--; } } }; this.indent_to_tag = function(tag) { // Match the indentation level to the last use of this tag, but don't remove it. if (!this.tags[tag + 'count']) { return; } var temp_parent = this.tags.parent; while (temp_parent) { if (tag + this.tags[tag + 'count'] === temp_parent) { break; } temp_parent = this.tags[temp_parent + 'parent']; } if (temp_parent) { this.indent_level = this.tags[tag + this.tags[tag + 'count']]; } }; this.get_tag = function(peek) { //function to get a full tag and parse its type var input_char = '', content = [], comment = '', space = false, tag_start, tag_end, tag_start_char, orig_pos = this.pos, orig_line_char_count = this.line_char_count; peek = peek !== undefined ? peek : false; do { if (this.pos >= this.input.length) { if (peek) { this.pos = orig_pos; this.line_char_count = orig_line_char_count; } return content.length ? content.join('') : ['', 'TK_EOF']; } input_char = this.input.charAt(this.pos); this.pos++; if (this.Utils.in_array(input_char, this.Utils.whitespace)) { //don't want to insert unnecessary space space = true; continue; } if (input_char === "'" || input_char === '"') { input_char += this.get_unformatted(input_char); space = true; } if (input_char === '=') { //no space before = space = false; } if (content.length && content[content.length - 1] !== '=' && input_char !== '>' && space) { //no space after = or before > if (this.line_char_count >= this.wrap_line_length) { this.print_newline(false, content); this.print_indentation(content); } else { content.push(' '); this.line_char_count++; } space = false; } if (indent_handlebars && tag_start_char === '<') { // When inside an angle-bracket tag, put spaces around // handlebars not inside of strings. if ((input_char + this.input.charAt(this.pos)) === '{{') { input_char += this.get_unformatted('}}'); if (content.length && content[content.length - 1] !== ' ' && content[content.length - 1] !== '<') { input_char = ' ' + input_char; } space = true; } } if (input_char === '<' && !tag_start_char) { tag_start = this.pos - 1; tag_start_char = '<'; } if (indent_handlebars && !tag_start_char) { if (content.length >= 2 && content[content.length - 1] === '{' && content[content.length - 2] == '{') { if (input_char === '#' || input_char === '/') { tag_start = this.pos - 3; } else { tag_start = this.pos - 2; } tag_start_char = '{'; } } this.line_char_count++; content.push(input_char); //inserts character at-a-time (or string) if (content[1] && content[1] === '!') { //if we're in a comment, do something special // We treat all comments as literals, even more than preformatted tags // we just look for the appropriate close tag content = [this.get_comment(tag_start)]; break; } if (indent_handlebars && tag_start_char === '{' && content.length > 2 && content[content.length - 2] === '}' && content[content.length - 1] === '}') { break; } } while (input_char !== '>'); var tag_complete = content.join(''); var tag_index; var tag_offset; if (tag_complete.indexOf(' ') !== -1) { //if there's whitespace, thats where the tag name ends tag_index = tag_complete.indexOf(' '); } else if (tag_complete[0] === '{') { tag_index = tag_complete.indexOf('}'); } else { //otherwise go with the tag ending tag_index = tag_complete.indexOf('>'); } if (tag_complete[0] === '<' || !indent_handlebars) { tag_offset = 1; } else { tag_offset = tag_complete[2] === '#' ? 3 : 2; } var tag_check = tag_complete.substring(tag_offset, tag_index).toLowerCase(); if (tag_complete.charAt(tag_complete.length - 2) === '/' || this.Utils.in_array(tag_check, this.Utils.single_token)) { //if this tag name is a single tag type (either in the list or has a closing /) if (!peek) { this.tag_type = 'SINGLE'; } } else if (indent_handlebars && tag_complete[0] === '{' && tag_check === 'else') { if (!peek) { this.indent_to_tag('if'); this.tag_type = 'HANDLEBARS_ELSE'; this.indent_content = true; this.traverse_whitespace(); } } else if (tag_check === 'script' && (tag_complete.search('type') === -1 || (tag_complete.search('type') > -1 && tag_complete.search(/\b(text|application)\/(x-)?(javascript|ecmascript|jscript|livescript)/) > -1))) { if (!peek) { this.record_tag(tag_check); this.tag_type = 'SCRIPT'; } } else if (tag_check === 'style' && (tag_complete.search('type') === -1 || (tag_complete.search('type') > -1 && tag_complete.search('text/css') > -1))) { if (!peek) { this.record_tag(tag_check); this.tag_type = 'STYLE'; } } else if (this.is_unformatted(tag_check, unformatted)) { // do not reformat the "unformatted" tags comment = this.get_unformatted('', tag_complete); //...delegate to get_unformatted function content.push(comment); // Preserve collapsed whitespace either before or after this tag. if (tag_start > 0 && this.Utils.in_array(this.input.charAt(tag_start - 1), this.Utils.whitespace)) { content.splice(0, 0, this.input.charAt(tag_start - 1)); } tag_end = this.pos - 1; if (this.Utils.in_array(this.input.charAt(tag_end + 1), this.Utils.whitespace)) { content.push(this.input.charAt(tag_end + 1)); } this.tag_type = 'SINGLE'; } else if (tag_check.charAt(0) === '!') { //peek for ', matched = false; this.pos = start_pos; input_char = this.input.charAt(this.pos); this.pos++; while (this.pos <= this.input.length) { comment += input_char; // only need to check for the delimiter if the last chars match if (comment[comment.length - 1] === delimiter[delimiter.length - 1] && comment.indexOf(delimiter) !== -1) { break; } // only need to search for custom delimiter for the first few characters if (!matched && comment.length < 10) { if (comment.indexOf(''; matched = true; } else if (comment.indexOf(''; matched = true; } else if (comment.indexOf(''; matched = true; } else if (comment.indexOf(''; matched = true; } } input_char = this.input.charAt(this.pos); this.pos++; } return comment; }; this.get_unformatted = function(delimiter, orig_tag) { //function to return unformatted content in its entirety if (orig_tag && orig_tag.toLowerCase().indexOf(delimiter) !== -1) { return ''; } var input_char = ''; var content = ''; var min_index = 0; var space = true; do { if (this.pos >= this.input.length) { return content; } input_char = this.input.charAt(this.pos); this.pos++; if (this.Utils.in_array(input_char, this.Utils.whitespace)) { if (!space) { this.line_char_count--; continue; } if (input_char === '\n' || input_char === '\r') { content += '\n'; /* Don't change tab indention for unformatted blocks. If using code for html editing, this will greatly affect
 tags if they are specified in the 'unformatted array'
                for (var i=0; i]*>\s*$/);

                // if next_tag comes back but is not an isolated tag, then
                // let's treat the 'a' tag as having content
                // and respect the unformatted option
                if (!tag || this.Utils.in_array(tag, unformatted)) {
                    return true;
                } else {
                    return false;
                }
            };

            this.printer = function(js_source, indent_character, indent_size, wrap_line_length, brace_style) { //handles input/output and some other printing functions

                this.input = js_source || ''; //gets the input for the Parser
                this.output = [];
                this.indent_character = indent_character;
                this.indent_string = '';
                this.indent_size = indent_size;
                this.brace_style = brace_style;
                this.indent_level = 0;
                this.wrap_line_length = wrap_line_length;
                this.line_char_count = 0; //count to see if wrap_line_length was exceeded

                for (var i = 0; i < this.indent_size; i++) {
                    this.indent_string += this.indent_character;
                }

                this.print_newline = function(force, arr) {
                    this.line_char_count = 0;
                    if (!arr || !arr.length) {
                        return;
                    }
                    if (force || (arr[arr.length - 1] !== '\n')) { //we might want the extra line
                        arr.push('\n');
                    }
                };

                this.print_indentation = function(arr) {
                    for (var i = 0; i < this.indent_level; i++) {
                        arr.push(this.indent_string);
                        this.line_char_count += this.indent_string.length;
                    }
                };

                this.print_token = function(text) {
                    if (text || text !== '') {
                        if (this.output.length && this.output[this.output.length - 1] === '\n') {
                            this.print_indentation(this.output);
                            text = ltrim(text);
                        }
                    }
                    this.print_token_raw(text);
                };

                this.print_token_raw = function(text) {
                    if (text && text !== '') {
                        if (text.length > 1 && text[text.length - 1] === '\n') {
                            // unformatted tags can grab newlines as their last character
                            this.output.push(text.slice(0, -1));
                            this.print_newline(false, this.output);
                        } else {
                            this.output.push(text);
                        }
                    }

                    for (var n = 0; n < this.newlines; n++) {
                        this.print_newline(n > 0, this.output);
                    }
                    this.newlines = 0;
                };

                this.indent = function() {
                    this.indent_level++;
                };

                this.unindent = function() {
                    if (this.indent_level > 0) {
                        this.indent_level--;
                    }
                };
            };
            return this;
        }

        /*_____________________--------------------_____________________*/

        multi_parser = new Parser(); //wrapping functions Parser
        multi_parser.printer(html_source, indent_character, indent_size, wrap_line_length, brace_style); //initialize starting values

        while (true) {
            var t = multi_parser.get_token();
            multi_parser.token_text = t[0];
            multi_parser.token_type = t[1];

            if (multi_parser.token_type === 'TK_EOF') {
                break;
            }

            switch (multi_parser.token_type) {
                case 'TK_TAG_START':
                    multi_parser.print_newline(false, multi_parser.output);
                    multi_parser.print_token(multi_parser.token_text);
                    if (multi_parser.indent_content) {
                        multi_parser.indent();
                        multi_parser.indent_content = false;
                    }
                    multi_parser.current_mode = 'CONTENT';
                    break;
                case 'TK_TAG_STYLE':
                case 'TK_TAG_SCRIPT':
                    multi_parser.print_newline(false, multi_parser.output);
                    multi_parser.print_token(multi_parser.token_text);
                    multi_parser.current_mode = 'CONTENT';
                    break;
                case 'TK_TAG_END':
                    //Print new line only if the tag has no content and has child
                    if (multi_parser.last_token === 'TK_CONTENT' && multi_parser.last_text === '') {
                        var tag_name = multi_parser.token_text.match(/\w+/)[0];
                        var tag_extracted_from_last_output = null;
                        if (multi_parser.output.length) {
                            tag_extracted_from_last_output = multi_parser.output[multi_parser.output.length - 1].match(/(?:<|{{#)\s*(\w+)/);
                        }
                        if (tag_extracted_from_last_output === null ||
                            tag_extracted_from_last_output[1] !== tag_name) {
                            multi_parser.print_newline(false, multi_parser.output);
                        }
                    }
                    multi_parser.print_token(multi_parser.token_text);
                    multi_parser.current_mode = 'CONTENT';
                    break;
                case 'TK_TAG_SINGLE':
                    // Don't add a newline before elements that should remain unformatted.
                    var tag_check = multi_parser.token_text.match(/^\s*<([a-z]+)/i);
                    if (!tag_check || !multi_parser.Utils.in_array(tag_check[1], unformatted)) {
                        multi_parser.print_newline(false, multi_parser.output);
                    }
                    multi_parser.print_token(multi_parser.token_text);
                    multi_parser.current_mode = 'CONTENT';
                    break;
                case 'TK_TAG_HANDLEBARS_ELSE':
                    multi_parser.print_token(multi_parser.token_text);
                    if (multi_parser.indent_content) {
                        multi_parser.indent();
                        multi_parser.indent_content = false;
                    }
                    multi_parser.current_mode = 'CONTENT';
                    break;
                case 'TK_CONTENT':
                    multi_parser.print_token(multi_parser.token_text);
                    multi_parser.current_mode = 'TAG';
                    break;
                case 'TK_STYLE':
                case 'TK_SCRIPT':
                    if (multi_parser.token_text !== '') {
                        multi_parser.print_newline(false, multi_parser.output);
                        var text = multi_parser.token_text,
                            _beautifier,
                            script_indent_level = 1;
                        if (multi_parser.token_type === 'TK_SCRIPT') {
                            _beautifier = typeof js_beautify === 'function' && js_beautify;
                        } else if (multi_parser.token_type === 'TK_STYLE') {
                            _beautifier = typeof css_beautify === 'function' && css_beautify;
                        }

                        if (options.indent_scripts === "keep") {
                            script_indent_level = 0;
                        } else if (options.indent_scripts === "separate") {
                            script_indent_level = -multi_parser.indent_level;
                        }

                        var indentation = multi_parser.get_full_indent(script_indent_level);
                        if (_beautifier) {
                            // call the Beautifier if avaliable
                            text = _beautifier(text.replace(/^\s*/, indentation), options);
                        } else {
                            // simply indent the string otherwise
                            var white = text.match(/^\s*/)[0];
                            var _level = white.match(/[^\n\r]*$/)[0].split(multi_parser.indent_string).length - 1;
                            var reindent = multi_parser.get_full_indent(script_indent_level - _level);
                            text = text.replace(/^\s*/, indentation)
                                .replace(/\r\n|\r|\n/g, '\n' + reindent)
                                .replace(/\s+$/, '');
                        }
                        if (text) {
                            multi_parser.print_token_raw(indentation + trim(text));
                            multi_parser.print_newline(false, multi_parser.output);
                        }
                    }
                    multi_parser.current_mode = 'TAG';
                    break;
            }
            multi_parser.last_token = multi_parser.token_type;
            multi_parser.last_text = multi_parser.token_text;
        }
        return multi_parser.output.join('');
    }

    var beautify = require('devtools/shared/jsbeautify/beautify');

    exports.htmlBeautify = function(html_source, options) {
        return style_html(html_source, options, beautify.js, beautify.css);
    };
}());