if ( strpos( $pee, '<option' ) !== false ) {
$pee = preg_replace( '|\s*<option|', '<option', $pee );
$pee = preg_replace( '|</option>\s*|', '</option>', $pee );
* Collapse line breaks inside <object> elements, before <param> and <embed> elements
* so they don't get autop'd.
if ( strpos( $pee, '</object>' ) !== false ) {
$pee = preg_replace( '|(<object[^>]*>)\s*|', '$1', $pee );
$pee = preg_replace( '|\s*</object>|', '</object>', $pee );
$pee = preg_replace( '%\s*(</?(?:param|embed)[^>]*>)\s*%', '$1', $pee );
* Collapse line breaks inside <audio> and <video> elements,
* before and after <source> and <track> elements.
if ( strpos( $pee, '<source' ) !== false || strpos( $pee, '<track' ) !== false ) {
$pee = preg_replace( '%([<\[](?:audio|video)[^>\]]*[>\]])\s*%', '$1', $pee );
$pee = preg_replace( '%\s*([<\[]/(?:audio|video)[>\]])%', '$1', $pee );
$pee = preg_replace( '%\s*(<(?:source|track)[^>]*>)\s*%', '$1', $pee );
// Collapse line breaks before and after <figcaption> elements.
if ( strpos( $pee, '<figcaption' ) !== false ) {
$pee = preg_replace( '|\s*(<figcaption[^>]*>)|', '$1', $pee );
$pee = preg_replace( '|</figcaption>\s*|', '</figcaption>', $pee );
// Remove more than two contiguous line breaks.
$pee = preg_replace( "/\n\n+/", "\n\n", $pee );
// Split up the contents into an array of strings, separated by double line breaks.
$pees = preg_split( '/\n\s*\n/', $pee, -1, PREG_SPLIT_NO_EMPTY );
// Reset $pee prior to rebuilding.
// Rebuild the content as a string, wrapping every bit with a <p>.
foreach ( $pees as $tinkle ) {
$pee .= '<p>' . trim( $tinkle, "\n" ) . "</p>\n";
// Under certain strange conditions it could create a P of entirely whitespace.
$pee = preg_replace( '|<p>\s*</p>|', '', $pee );
// Add a closing <p> inside <div>, <address>, or <form> tag if missing.
$pee = preg_replace( '!<p>([^<]+)</(div|address|form)>!', '<p>$1</p></$2>', $pee );
// If an opening or closing block element tag is wrapped in a <p>, unwrap it.
$pee = preg_replace( '!<p>\s*(</?' . $allblocks . '[^>]*>)\s*</p>!', '$1', $pee );
// In some cases <li> may get wrapped in <p>, fix them.
$pee = preg_replace( '|<p>(<li.+?)</p>|', '$1', $pee );
// If a <blockquote> is wrapped with a <p>, move it inside the <blockquote>.
$pee = preg_replace( '|<p><blockquote([^>]*)>|i', '<blockquote$1><p>', $pee );
$pee = str_replace( '</blockquote></p>', '</p></blockquote>', $pee );
// If an opening or closing block element tag is preceded by an opening <p> tag, remove it.
$pee = preg_replace( '!<p>\s*(</?' . $allblocks . '[^>]*>)!', '$1', $pee );
// If an opening or closing block element tag is followed by a closing <p> tag, remove it.
$pee = preg_replace( '!(</?' . $allblocks . '[^>]*>)\s*</p>!', '$1', $pee );
// Optionally insert line breaks.
// Replace newlines that shouldn't be touched with a placeholder.
$pee = preg_replace_callback( '/<(script|style|svg).*?<\/\\1>/s', '_autop_newline_preservation_helper', $pee );
$pee = str_replace( array( '<br>', '<br/>' ), '<br />', $pee );
// Replace any new line characters that aren't preceded by a <br /> with a <br />.
$pee = preg_replace( '|(?<!<br />)\s*\n|', "<br />\n", $pee );
// Replace newline placeholders with newlines.
$pee = str_replace( '<WPPreserveNewline />', "\n", $pee );
// If a <br /> tag is after an opening or closing block tag, remove it.
$pee = preg_replace( '!(</?' . $allblocks . '[^>]*>)\s*<br />!', '$1', $pee );
// If a <br /> tag is before a subset of opening or closing block tags, remove it.
$pee = preg_replace( '!<br />(\s*</?(?:p|li|div|dl|dd|dt|th|pre|td|ul|ol)[^>]*>)!', '$1', $pee );
$pee = preg_replace( "|\n</p>$|", '</p>', $pee );
// Replace placeholder <pre> tags with their original content.
if ( ! empty( $pre_tags ) ) {
$pee = str_replace( array_keys( $pre_tags ), array_values( $pre_tags ), $pee );
// Restore newlines in all elements.
if ( false !== strpos( $pee, '<!-- wpnl -->' ) ) {
$pee = str_replace( array( ' <!-- wpnl --> ', '<!-- wpnl -->' ), "\n", $pee );
* Separate HTML elements and comments from the text.
* @param string $input The text which has to be formatted.
* @return string[] Array of the formatted text.
function wp_html_split( $input ) {
return preg_split( get_html_split_regex(), $input, -1, PREG_SPLIT_DELIM_CAPTURE );
* Retrieve the regular expression for an HTML element.
* @return string The regular expression
function get_html_split_regex() {
if ( ! isset( $regex ) ) {
// phpcs:disable Squiz.Strings.ConcatenationSpacing.PaddingFound -- don't remove regex indentation
'!' // Start of comment, after the <.
. '(?:' // Unroll the loop: Consume everything until --> is found.
. '-(?!->)' // Dash not followed by end of comment.
. '[^\-]*+' // Consume non-dashes.
. ')*+' // Loop possessively.
. '(?:-->)?'; // End of comment. If not found, match all input.
'!\[CDATA\[' // Start of comment, after the <.
. '[^\]]*+' // Consume non-].
. '(?:' // Unroll the loop: Consume everything until ]]> is found.
. '](?!]>)' // One ] not followed by end of comment.
. '[^\]]*+' // Consume non-].
. ')*+' // Loop possessively.
. '(?:]]>)?'; // End of comment. If not found, match all input.
'(?=' // Is the element escaped?
. '(?(?=!-)' // If yes, which type?
'/(' // Capture the entire match.
. '<' // Find start of element.
. '(?' // Conditional expression follows.
. $escaped // Find end of escaped element.
. '[^>]*>?' // Find end of normal element.
* Retrieve the combined regular expression for HTML and shortcodes.
* @internal This function will be removed in 4.5.0 per Shortcode API Roadmap.
* @param string $shortcode_regex The result from _get_wptexturize_shortcode_regex(). Optional.
* @return string The regular expression
function _get_wptexturize_split_regex( $shortcode_regex = '' ) {
if ( ! isset( $html_regex ) ) {
// phpcs:disable Squiz.Strings.ConcatenationSpacing.PaddingFound -- don't remove regex indentation
'!' // Start of comment, after the <.
. '(?:' // Unroll the loop: Consume everything until --> is found.
. '-(?!->)' // Dash not followed by end of comment.
. '[^\-]*+' // Consume non-dashes.
. ')*+' // Loop possessively.
. '(?:-->)?'; // End of comment. If not found, match all input.
$html_regex = // Needs replaced with wp_html_split() per Shortcode API Roadmap.
'<' // Find start of element.
. '(?(?=!--)' // Is this a comment?
. $comment_regex // Find end of comment.
. '[^>]*>?' // Find end of element. If not found, match all input.
if ( empty( $shortcode_regex ) ) {
$regex = '/(' . $html_regex . ')/';
$regex = '/(' . $html_regex . '|' . $shortcode_regex . ')/';
* Retrieve the regular expression for shortcodes.
* @param string[] $tagnames Array of shortcodes to find.
* @return string The regular expression
function _get_wptexturize_shortcode_regex( $tagnames ) {
$tagregexp = implode( '|', array_map( 'preg_quote', $tagnames ) );
$tagregexp = "(?:$tagregexp)(?=[\\s\\]\\/])"; // Excerpt of get_shortcode_regex().
// phpcs:disable Squiz.Strings.ConcatenationSpacing.PaddingFound -- don't remove regex indentation
'\[' // Find start of shortcode.
. '[\/\[]?' // Shortcodes may begin with [/ or [[.
. $tagregexp // Only match registered shortcodes, because performance.
. '[^\[\]<>]+' // Shortcodes do not contain other shortcodes. Quantifier critical.
. '<[^\[\]>]*>' // HTML elements permitted. Prevents matching ] before >.
. ')*+' // Possessive critical.
. '\]' // Find end of shortcode.
. '\]?'; // Shortcodes may end with ]].
* Replace characters or phrases within HTML elements only.
* @param string $haystack The text which has to be formatted.
* @param array $replace_pairs In the form array('from' => 'to', ...).
* @return string The formatted text.
function wp_replace_in_html_tags( $haystack, $replace_pairs ) {
$textarr = wp_html_split( $haystack );
// Optimize when searching for one item.
if ( 1 === count( $replace_pairs ) ) {
// Extract $needle and $replace.
foreach ( $replace_pairs as $needle => $replace ) {
// Loop through delimiters (elements) only.
for ( $i = 1, $c = count( $textarr ); $i < $c; $i += 2 ) {
if ( false !== strpos( $textarr[ $i ], $needle ) ) {
$textarr[ $i ] = str_replace( $needle, $replace, $textarr[ $i ] );
$needles = array_keys( $replace_pairs );
// Loop through delimiters (elements) only.
for ( $i = 1, $c = count( $textarr ); $i < $c; $i += 2 ) {
foreach ( $needles as $needle ) {
if ( false !== strpos( $textarr[ $i ], $needle ) ) {
$textarr[ $i ] = strtr( $textarr[ $i ], $replace_pairs );
// After one strtr() break out of the foreach loop and look at next element.
$haystack = implode( $textarr );
* Newline preservation help function for wpautop
* @param array $matches preg_replace_callback matches array
function _autop_newline_preservation_helper( $matches ) {
return str_replace( "\n", '<WPPreserveNewline />', $matches[0] );
* Don't auto-p wrap shortcodes that stand alone
* Ensures that shortcodes are not wrapped in `<p>...</p>`.
* @global array $shortcode_tags
* @param string $pee The content.
* @return string The filtered content.
function shortcode_unautop( $pee ) {
if ( empty( $shortcode_tags ) || ! is_array( $shortcode_tags ) ) {
$tagregexp = implode( '|', array_map( 'preg_quote', array_keys( $shortcode_tags ) ) );
$spaces = wp_spaces_regexp();
// phpcs:disable Squiz.Strings.ConcatenationSpacing.PaddingFound,WordPress.WhiteSpace.PrecisionAlignment.Found -- don't remove regex indentation
. '<p>' // Opening paragraph.
. '(?:' . $spaces . ')*+' // Optional leading whitespace.
. '(' // 1: The shortcode.
. '\\[' // Opening bracket.
. "($tagregexp)" // 2: Shortcode name.
. '(?![\\w-])' // Not followed by word character or hyphen.
// Unroll the loop: Inside the opening shortcode tag.
. '[^\\]\\/]*' // Not a closing bracket or forward slash.
. '\\/(?!\\])' // A forward slash not followed by a closing bracket.
. '[^\\]\\/]*' // Not a closing bracket or forward slash.
. '\\/\\]' // Self closing tag and closing bracket.
. '\\]' // Closing bracket.
. '(?:' // Unroll the loop: Optionally, anything between the opening and closing shortcode tags.
. '[^\\[]*+' // Not an opening bracket.
. '\\[(?!\\/\\2\\])' // An opening bracket not followed by the closing shortcode tag.
. '[^\\[]*+' // Not an opening bracket.
. '\\[\\/\\2\\]' // Closing shortcode tag.
. '(?:' . $spaces . ')*+' // Optional trailing whitespace.
. '<\\/p>' // Closing paragraph.
return preg_replace( $pattern, '$1', $pee );
* Checks to see if a string is utf8 encoded.
* NOTE: This function checks for 5-Byte sequences, UTF8
* has Bytes Sequences with a maximum length of 4.
* @author bmorel at ssi dot fr (modified)
* @param string $str The string to be checked
* @return bool True if $str fits a UTF-8 model, false otherwise.
function seems_utf8( $str ) {
mbstring_binary_safe_encoding();
$length = strlen( $str );
reset_mbstring_encoding();
for ( $i = 0; $i < $length; $i++ ) {
} elseif ( ( $c & 0xE0 ) == 0xC0 ) {
} elseif ( ( $c & 0xF0 ) == 0xE0 ) {
} elseif ( ( $c & 0xF8 ) == 0xF0 ) {
} elseif ( ( $c & 0xFC ) == 0xF8 ) {
} elseif ( ( $c & 0xFE ) == 0xFC ) {
return false; // Does not match any model.
for ( $j = 0; $j < $n; $j++ ) { // n bytes matching 10bbbbbb follow ?
if ( ( ++$i == $length ) || ( ( ord( $str[ $i ] ) & 0xC0 ) != 0x80 ) ) {
* Converts a number of special characters into their HTML entities.
* Specifically deals with: &, <, >, ", and '.
* $quote_style can be set to ENT_COMPAT to encode " to
* ", or ENT_QUOTES to do both. Default is ENT_NOQUOTES where no quotes are encoded.
* @since 5.5.0 `$quote_style` also accepts `ENT_XML1`.
* @param string $string The text which is to be encoded.
* @param int|string $quote_style Optional. Converts double quotes if set to ENT_COMPAT,
* both single and double if set to ENT_QUOTES or none if set to ENT_NOQUOTES.
* Converts single and double quotes, as well as converting HTML
* named entities (that are not also XML named entities) to their
* code points if set to ENT_XML1. Also compatible with old values;
* converting single quotes if set to 'single',
* double if set to 'double' or both if otherwise set.
* Default is ENT_NOQUOTES.
* @param false|string $charset Optional. The character encoding of the string. Default false.
* @param bool $double_encode Optional. Whether to encode existing HTML entities. Default false.
* @return string The encoded text with HTML entities.
function _wp_specialchars( $string, $quote_style = ENT_NOQUOTES, $charset = false, $double_encode = false ) {
$string = (string) $string;
if ( 0 === strlen( $string ) ) {
// Don't bother if there are no specialchars - saves some processing.
if ( ! preg_match( '/[&<>"\']/', $string ) ) {
// Account for the previous behaviour of the function when the $quote_style is not an accepted value.
if ( empty( $quote_style ) ) {
$quote_style = ENT_NOQUOTES;
} elseif ( ENT_XML1 === $quote_style ) {
$quote_style = ENT_QUOTES | ENT_XML1;
} elseif ( ! in_array( $quote_style, array( ENT_NOQUOTES, ENT_COMPAT, ENT_QUOTES, 'single', 'double' ), true ) ) {
$quote_style = ENT_QUOTES;
// Store the site charset as a static to avoid multiple calls to wp_load_alloptions().
if ( ! isset( $_charset ) ) {
$alloptions = wp_load_alloptions();
$_charset = isset( $alloptions['blog_charset'] ) ? $alloptions['blog_charset'] : '';
if ( in_array( $charset, array( 'utf8', 'utf-8', 'UTF8' ), true ) ) {
$_quote_style = $quote_style;
if ( 'double' === $quote_style ) {
$quote_style = ENT_COMPAT;
$_quote_style = ENT_COMPAT;
} elseif ( 'single' === $quote_style ) {
$quote_style = ENT_NOQUOTES;
if ( ! $double_encode ) {
// Guarantee every &entity; is valid, convert &garbage; into &garbage;
// This is required for PHP < 5.4.0 because ENT_HTML401 flag is unavailable.
$string = wp_kses_normalize_entities( $string, ( $quote_style & ENT_XML1 ) ? 'xml' : 'html' );
$string = htmlspecialchars( $string, $quote_style, $charset, $double_encode );
if ( 'single' === $_quote_style ) {
$string = str_replace( "'", ''', $string );
* Converts a number of HTML entities into their special characters.
* Specifically deals with: &, <, >, ", and '.
* $quote_style can be set to ENT_COMPAT to decode " entities,
* or ENT_QUOTES to do both " and '. Default is ENT_NOQUOTES where no quotes are decoded.