* Matches a CSS URL type value
var REGEXP_STYLE_URL_TYPE = /^url\s*\(['"\s]*(.*?)['"\s]*\)$/;
* Boolean attributes are attributes whose presence as being assigned is
* meaningful, even if only empty.
* See: https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#boolean-attributes
* Extracted from: https://html.spec.whatwg.org/multipage/indices.html#attributes-3
* Object.keys( Array.from( document.querySelectorAll( '#attributes-1 > tbody > tr' ) )
* .filter( ( tr ) => tr.lastChild.textContent.indexOf( 'Boolean attribute' ) !== -1 )
* .reduce( ( result, tr ) => Object.assign( result, {
* [ tr.firstChild.textContent.trim() ]: true
var BOOLEAN_ATTRIBUTES = ['allowfullscreen', 'allowpaymentrequest', 'allowusermedia', 'async', 'autofocus', 'autoplay', 'checked', 'controls', 'default', 'defer', 'disabled', 'download', 'formnovalidate', 'hidden', 'ismap', 'itemscope', 'loop', 'multiple', 'muted', 'nomodule', 'novalidate', 'open', 'playsinline', 'readonly', 'required', 'reversed', 'selected', 'typemustmatch'];
* Enumerated attributes are attributes which must be of a specific value form.
* Like boolean attributes, these are meaningful if specified, even if not of a
* valid enumerated value.
* See: https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#enumerated-attribute
* Extracted from: https://html.spec.whatwg.org/multipage/indices.html#attributes-3
* Object.keys( Array.from( document.querySelectorAll( '#attributes-1 > tbody > tr' ) )
* .filter( ( tr ) => /^("(.+?)";?\s*)+/.test( tr.lastChild.textContent.trim() ) )
* .reduce( ( result, tr ) => Object.assign( result, {
* [ tr.firstChild.textContent.trim() ]: true
var ENUMERATED_ATTRIBUTES = ['autocapitalize', 'autocomplete', 'charset', 'contenteditable', 'crossorigin', 'decoding', 'dir', 'draggable', 'enctype', 'formenctype', 'formmethod', 'http-equiv', 'inputmode', 'kind', 'method', 'preload', 'scope', 'shape', 'spellcheck', 'translate', 'type', 'wrap'];
* Meaningful attributes are those who cannot be safely ignored when omitted in
* one HTML markup string and not another.
var MEANINGFUL_ATTRIBUTES = [].concat(BOOLEAN_ATTRIBUTES, ENUMERATED_ATTRIBUTES);
* Array of functions which receive a text string on which to apply normalizing
* behavior for consideration in text token equivalence, carefully ordered from
* least-to-most expensive operations.
var TEXT_NORMALIZATIONS = [external_lodash_["identity"], getTextWithCollapsedWhitespace];
* Regular expression matching a named character reference. In lieu of bundling
* a full set of references, the pattern covers the minimal necessary to test
* positively against the full set.
* "The ampersand must be followed by one of the names given in the named
* character references section, using the same case."
* Tested aginst "12.5 Named character references":
* const references = Array.from( document.querySelectorAll(
* '#named-character-references-table tr[id^=entity-] td:first-child'
* ) ).map( ( code ) => code.textContent )
* references.every( ( reference ) => /^[\da-z]+$/i.test( reference ) )
* @see https://html.spec.whatwg.org/multipage/syntax.html#character-references
* @see https://html.spec.whatwg.org/multipage/named-characters.html#named-character-references
var REGEXP_NAMED_CHARACTER_REFERENCE = /^[\da-z]+$/i;
* Regular expression matching a decimal character reference.
* "The ampersand must be followed by a U+0023 NUMBER SIGN character (#),
* followed by one or more ASCII digits, representing a base-ten integer"
* @see https://html.spec.whatwg.org/multipage/syntax.html#character-references
var REGEXP_DECIMAL_CHARACTER_REFERENCE = /^#\d+$/;
* Regular expression matching a hexadecimal character reference.
* "The ampersand must be followed by a U+0023 NUMBER SIGN character (#), which
* must be followed by either a U+0078 LATIN SMALL LETTER X character (x) or a
* U+0058 LATIN CAPITAL LETTER X character (X), which must then be followed by
* one or more ASCII hex digits, representing a hexadecimal integer"
* @see https://html.spec.whatwg.org/multipage/syntax.html#character-references
var REGEXP_HEXADECIMAL_CHARACTER_REFERENCE = /^#x[\da-f]+$/i;
* Returns true if the given string is a valid character reference segment, or
* false otherwise. The text should be stripped of `&` and `;` demarcations.
* @param {string} text Text to test.
* @return {boolean} Whether text is valid character reference.
function isValidCharacterReference(text) {
return REGEXP_NAMED_CHARACTER_REFERENCE.test(text) || REGEXP_DECIMAL_CHARACTER_REFERENCE.test(text) || REGEXP_HEXADECIMAL_CHARACTER_REFERENCE.test(text);
* Subsitute EntityParser class for `simple-html-tokenizer` which uses the
* implementation of `decodeEntities` from `html-entities`, in order to avoid
* bundling a massive named character reference.
* @see https://github.com/tildeio/simple-html-tokenizer/tree/HEAD/src/entity-parser.ts
var validation_DecodeEntityParser = /*#__PURE__*/function () {
function DecodeEntityParser() {
Object(classCallCheck["a" /* default */])(this, DecodeEntityParser);
Object(createClass["a" /* default */])(DecodeEntityParser, [{
* Returns a substitute string for an entity string sequence between `&`
* and `;`, or undefined if no substitution should occur.
* @param {string} entity Entity fragment discovered in HTML.
* @return {?string} Entity substitute value.
value: function parse(entity) {
if (isValidCharacterReference(entity)) {
return Object(external_wp_htmlEntities_["decodeEntities"])('&' + entity + ';');
return DecodeEntityParser;
* Given a specified string, returns an array of strings split by consecutive
* whitespace, ignoring leading or trailing whitespace.
* @param {string} text Original text.
* @return {string[]} Text pieces split on whitespace.
function getTextPiecesSplitOnWhitespace(text) {
return text.trim().split(REGEXP_WHITESPACE);
* Given a specified string, returns a new trimmed string where all consecutive
* whitespace is collapsed to a single space.
* @param {string} text Original text.
* @return {string} Trimmed text with consecutive whitespace collapsed.
function getTextWithCollapsedWhitespace(text) {
// This is an overly simplified whitespace comparison. The specification is
// more prescriptive of whitespace behavior in inline and block contexts.
// See: https://medium.com/@patrickbrosset/when-does-white-space-matter-in-html-b90e8a7cdd33
return getTextPiecesSplitOnWhitespace(text).join(' ');
* Returns attribute pairs of the given StartTag token, including only pairs
* where the value is non-empty or the attribute is a boolean attribute, an
* enumerated attribute, or a custom data- attribute.
* @see MEANINGFUL_ATTRIBUTES
* @param {Object} token StartTag token.
* @return {Array[]} Attribute pairs.
function getMeaningfulAttributePairs(token) {
return token.attributes.filter(function (pair) {
var _pair = Object(slicedToArray["a" /* default */])(pair, 2),
return value || key.indexOf('data-') === 0 || Object(external_lodash_["includes"])(MEANINGFUL_ATTRIBUTES, key);
* Returns true if two text tokens (with `chars` property) are equivalent, or
* @param {Object} actual Actual token.
* @param {Object} expected Expected token.
* @param {Object} logger Validation logger object.
* @return {boolean} Whether two text tokens are equivalent.
function isEquivalentTextTokens(actual, expected) {
var logger = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : createLogger();
// This function is intentionally written as syntactically "ugly" as a hot
// path optimization. Text is progressively normalized in order from least-
// to-most operationally expensive, until the earliest point at which text
// can be confidently inferred as being equal.
var actualChars = actual.chars;
var expectedChars = expected.chars;
for (var i = 0; i < TEXT_NORMALIZATIONS.length; i++) {
var normalize = TEXT_NORMALIZATIONS[i];
actualChars = normalize(actualChars);
expectedChars = normalize(expectedChars);
if (actualChars === expectedChars) {
logger.warning('Expected text `%s`, saw `%s`.', expected.chars, actual.chars);
* Given a CSS length value, returns a normalized CSS length value for strict equality
* @param {string} value CSS length value.
* @return {string} Normalized CSS length value.
function getNormalizedLength(value) {
return 0 === parseFloat(value) ? '0' : value;
* Given a style value, returns a normalized style value for strict equality
* @param {string} value Style value.
* @return {string} Normalized style value.
function getNormalizedStyleValue(value) {
var textPieces = getTextPiecesSplitOnWhitespace(value);
var normalizedPieces = textPieces.map(getNormalizedLength);
var result = normalizedPieces.join(' ');
return result // Normalize URL type to omit whitespace or quotes
.replace(REGEXP_STYLE_URL_TYPE, 'url($1)');
* Given a style attribute string, returns an object of style properties.
* @param {string} text Style attribute.
* @return {Object} Style properties.
function getStyleProperties(text) {
var pairs = text // Trim ending semicolon (avoid including in split)
.replace(/;?\s*$/, '') // Split on property assignment
.split(';') // For each property assignment...
// ...split further into key-value pairs
var _style$split = style.split(':'),
_style$split2 = Object(toArray["a" /* default */])(_style$split),
valueParts = _style$split2.slice(1);
var value = valueParts.join(':');
return [key.trim(), getNormalizedStyleValue(value.trim())];
return Object(external_lodash_["fromPairs"])(pairs);
* Attribute-specific equality handlers
var isEqualAttributesOfName = validation_objectSpread({
class: function _class(actual, expected) {
// Class matches if members are the same, even if out of order or
// superfluous whitespace between.
return !external_lodash_["xor"].apply(void 0, Object(toConsumableArray["a" /* default */])([actual, expected].map(getTextPiecesSplitOnWhitespace))).length;
style: function style(actual, expected) {
return external_lodash_["isEqual"].apply(void 0, Object(toConsumableArray["a" /* default */])([actual, expected].map(getStyleProperties)));
}, Object(external_lodash_["fromPairs"])(BOOLEAN_ATTRIBUTES.map(function (attribute) {
return [attribute, external_lodash_["stubTrue"]];
* Given two sets of attribute tuples, returns true if the attribute sets are
* @param {Array[]} actual Actual attributes tuples.
* @param {Array[]} expected Expected attributes tuples.
* @param {Object} logger Validation logger object.
* @return {boolean} Whether attributes are equivalent.
function isEqualTagAttributePairs(actual, expected) {
var logger = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : createLogger();
// Attributes is tokenized as tuples. Their lengths should match. This also
// avoids us needing to check both attributes sets, since if A has any keys
// which do not exist in B, we know the sets to be different.
if (actual.length !== expected.length) {
logger.warning('Expected attributes %o, instead saw %o.', expected, actual);
} // Attributes are not guaranteed to occur in the same order. For validating
// actual attributes, first convert the set of expected attribute values to
// an object, for lookup by key.
var expectedAttributes = {};
for (var i = 0; i < expected.length; i++) {
expectedAttributes[expected[i][0].toLowerCase()] = expected[i][1];
for (var _i = 0; _i < actual.length; _i++) {
var _actual$_i = Object(slicedToArray["a" /* default */])(actual[_i], 2),
actualValue = _actual$_i[1];
var nameLower = name.toLowerCase(); // As noted above, if missing member in B, assume different
if (!expectedAttributes.hasOwnProperty(nameLower)) {
logger.warning('Encountered unexpected attribute `%s`.', name);
var expectedValue = expectedAttributes[nameLower];
var isEqualAttributes = isEqualAttributesOfName[nameLower];
// Defer custom attribute equality handling
if (!isEqualAttributes(actualValue, expectedValue)) {
logger.warning('Expected attribute `%s` of value `%s`, saw `%s`.', name, expectedValue, actualValue);
} else if (actualValue !== expectedValue) {
// Otherwise strict inequality should bail
logger.warning('Expected attribute `%s` of value `%s`, saw `%s`.', name, expectedValue, actualValue);
* Token-type-specific equality handlers
var isEqualTokensOfType = {
StartTag: function StartTag(actual, expected) {
var logger = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : createLogger();
if (actual.tagName !== expected.tagName && // Optimization: Use short-circuit evaluation to defer case-
// insensitive check on the assumption that the majority case will
// have exactly equal tag names.
actual.tagName.toLowerCase() !== expected.tagName.toLowerCase()) {
logger.warning('Expected tag name `%s`, instead saw `%s`.', expected.tagName, actual.tagName);
return isEqualTagAttributePairs.apply(void 0, Object(toConsumableArray["a" /* default */])([actual, expected].map(getMeaningfulAttributePairs)).concat([logger]));
Chars: isEquivalentTextTokens,
Comment: isEquivalentTextTokens
* Given an array of tokens, returns the first token which is not purely
* Mutates the tokens array.
* @param {Object[]} tokens Set of tokens to search.
* @return {Object} Next non-whitespace token.
function getNextNonWhitespaceToken(tokens) {
while (token = tokens.shift()) {
if (token.type !== 'Chars') {
if (!REGEXP_ONLY_WHITESPACE.test(token.chars)) {
* Tokenize an HTML string, gracefully handling any errors thrown during
* underlying tokenization.
* @param {string} html HTML string to tokenize.
* @param {Object} logger Validation logger object.
* @return {Object[]|null} Array of valid tokenized HTML elements, or null on error
function getHTMLTokens(html) {
var logger = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : createLogger();
return new Tokenizer(new validation_DecodeEntityParser()).tokenize(html);
logger.warning('Malformed HTML detected: %s', html);
* Returns true if the next HTML token closes the current token.
* @param {Object} currentToken Current token to compare with.
* @param {Object|undefined} nextToken Next token to compare against.
* @return {boolean} true if `nextToken` closes `currentToken`, false otherwise
function isClosedByToken(currentToken, nextToken) {
// Ensure this is a self closed token
if (!currentToken.selfClosing) {
} // Check token names and determine if nextToken is the closing tag for currentToken
if (nextToken && nextToken.tagName === currentToken.tagName && nextToken.type === 'EndTag') {
* Returns true if the given HTML strings are effectively equivalent, or
* false otherwise. Invalid HTML is not considered equivalent, even if the
* strings directly match.
* @param {string} actual Actual HTML string.
* @param {string} expected Expected HTML string.
* @param {Object} logger Validation logger object.
* @return {boolean} Whether HTML strings are equivalent.
function isEquivalentHTML(actual, expected) {
var logger = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : createLogger();
// Short-circuit if markup is identical.
if (actual === expected) {
} // Tokenize input content and reserialized save content
var _map = [actual, expected].map(function (html) {
return getHTMLTokens(html, logger);
_map2 = Object(slicedToArray["a" /* default */])(_map, 2),
expectedTokens = _map2[1]; // If either is malformed then stop comparing - the strings are not equivalent
if (!actualTokens || !expectedTokens) {
var actualToken, expectedToken;
while (actualToken = getNextNonWhitespaceToken(actualTokens)) {
expectedToken = getNextNonWhitespaceToken(expectedTokens); // Inequal if exhausted all expected tokens
logger.warning('Expected end of content, instead saw %o.', actualToken);
} // Inequal if next non-whitespace token of each set are not same type