/* * htmldiff.js is a library that compares HTML content. It creates a diff between two * HTML documents by combining the two documents and wrapping the differences with * and tags. Here is a high-level overview of how the diff works. * * 1. Tokenize the before and after HTML with html_to_tokens. * 2. Generate a list of operations that convert the before list of tokens to the after * list of tokens with calculate_operations, which does the following: * a. Find all the matching blocks of tokens between the before and after lists of * tokens with find_matching_blocks. This is done by finding the single longest * matching block with find_match, then recursively finding the next longest * matching block that precede and follow the longest matching block with * recursively_find_matching_blocks. * b. Determine insertions, deletions, and replacements from the matching blocks. * This is done in calculate_operations. * 3. Render the list of operations by wrapping tokens with and tags where * appropriate with render_operations. * * Example usage: * * var htmldiff = require('htmldiff.js'); * * htmldiff('

this is some text

', '

this is some more text

') * == '

this is some more text

' * * htmldiff('

this is some text

', '

this is some more text

', 'diff-class') * == '

this is some more text

' */ (function () { function is_end_of_tag(char) { return char === '>'; } function is_start_of_tag(char) { return char === '<'; } function is_close_tag(tag) { return /^\s*<\s*\/[^>]+>\s*$/.test(tag); } function is_whitespace(char) { return /^\s+$/.test(char); } function is_tag(token) { return /^\s*<[^>]+>\s*$/.test(token); } function isnt_tag(token) { return !is_tag(token); } /* * Checks if the current word is the beginning of an atomic tag. An atomic tag is one whose * child nodes should not be compared - the entire tag should be treated as one token. This * is useful for tags where it does not make sense to insert and tags. * * @param {string} word The characters of the current token read so far. * * @return {string|null} The name of the atomic tag if the word will be an atomic tag, * null otherwise */ function is_start_of_atomic_tag(word) { var result = /^<(iframe|object|math|svg|script)/.exec(word); if (result){ result = result[1]; } return result; } /* * Checks if the current word is the end of an atomic tag (i.e. it has all the characters, * except for the end bracket of the closing tag, such as '