* | U+1ED0 | Ố | O | Latin capital letter O with circumflex and acute |
* | U+1ED1 | ố | o | Latin small letter o with circumflex and acute |
* | U+1ED2 | Ồ | O | Latin capital letter O with circumflex and grave |
* | U+1ED3 | ồ | o | Latin small letter o with circumflex and grave |
* | U+1ED4 | Ổ | O | Latin capital letter O with circumflex and hook above |
* | U+1ED5 | ổ | o | Latin small letter o with circumflex and hook above |
* | U+1ED6 | Ỗ | O | Latin capital letter O with circumflex and tilde |
* | U+1ED7 | ỗ | o | Latin small letter o with circumflex and tilde |
* | U+1ED8 | Ộ | O | Latin capital letter O with circumflex and dot below |
* | U+1ED9 | ộ | o | Latin small letter o with circumflex and dot below |
* | U+1EDA | Ớ | O | Latin capital letter O with horn and acute |
* | U+1EDB | ớ | o | Latin small letter o with horn and acute |
* | U+1EDC | Ờ | O | Latin capital letter O with horn and grave |
* | U+1EDD | ờ | o | Latin small letter o with horn and grave |
* | U+1EDE | Ở | O | Latin capital letter O with horn and hook above |
* | U+1EDF | ở | o | Latin small letter o with horn and hook above |
* | U+1EE0 | Ỡ | O | Latin capital letter O with horn and tilde |
* | U+1EE1 | ỡ | o | Latin small letter o with horn and tilde |
* | U+1EE2 | Ợ | O | Latin capital letter O with horn and dot below |
* | U+1EE3 | ợ | o | Latin small letter o with horn and dot below |
* | U+1EE4 | Ụ | U | Latin capital letter U with dot below |
* | U+1EE5 | ụ | u | Latin small letter u with dot below |
* | U+1EE6 | Ủ | U | Latin capital letter U with hook above |
* | U+1EE7 | ủ | u | Latin small letter u with hook above |
* | U+1EE8 | Ứ | U | Latin capital letter U with horn and acute |
* | U+1EE9 | ứ | u | Latin small letter u with horn and acute |
* | U+1EEA | Ừ | U | Latin capital letter U with horn and grave |
* | U+1EEB | ừ | u | Latin small letter u with horn and grave |
* | U+1EEC | Ử | U | Latin capital letter U with horn and hook above |
* | U+1EED | ử | u | Latin small letter u with horn and hook above |
* | U+1EEE | Ữ | U | Latin capital letter U with horn and tilde |
* | U+1EEF | ữ | u | Latin small letter u with horn and tilde |
* | U+1EF0 | Ự | U | Latin capital letter U with horn and dot below |
* | U+1EF1 | ự | u | Latin small letter u with horn and dot below |
* | U+1EF2 | Ỳ | Y | Latin capital letter Y with grave |
* | U+1EF3 | ỳ | y | Latin small letter y with grave |
* | U+1EF4 | Ỵ | Y | Latin capital letter Y with dot below |
* | U+1EF5 | ỵ | y | Latin small letter y with dot below |
* | U+1EF6 | Ỷ | Y | Latin capital letter Y with hook above |
* | U+1EF7 | ỷ | y | Latin small letter y with hook above |
* | U+1EF8 | Ỹ | Y | Latin capital letter Y with tilde |
* | U+1EF9 | ỹ | y | Latin small letter y with tilde |
* German (`de_DE`), German formal (`de_DE_formal`), German (Switzerland) formal (`de_CH`),
* German (Switzerland) informal (`de_CH_informal`), and German (Austria) (`de_AT`) locales:
* | Code | Glyph | Replacement | Description |
* | -------- | ----- | ----------- | --------------------------------------- |
* | U+00C4 | Ä | Ae | Latin capital letter A with diaeresis |
* | U+00E4 | ä | ae | Latin small letter a with diaeresis |
* | U+00D6 | Ö | Oe | Latin capital letter O with diaeresis |
* | U+00F6 | ö | oe | Latin small letter o with diaeresis |
* | U+00DC | Ü | Ue | Latin capital letter U with diaeresis |
* | U+00FC | ü | ue | Latin small letter u with diaeresis |
* | U+00DF | ß | ss | Latin small letter sharp s |
* Danish (`da_DK`) locale:
* | Code | Glyph | Replacement | Description |
* | -------- | ----- | ----------- | --------------------------------------- |
* | U+00C6 | Æ | Ae | Latin capital letter AE |
* | U+00E6 | æ | ae | Latin small letter ae |
* | U+00D8 | Ø | Oe | Latin capital letter O with stroke |
* | U+00F8 | ø | oe | Latin small letter o with stroke |
* | U+00C5 | Å | Aa | Latin capital letter A with ring above |
* | U+00E5 | å | aa | Latin small letter a with ring above |
* | Code | Glyph | Replacement | Description |
* | -------- | ----- | ----------- | --------------------------------------- |
* | U+00B7 | l·l | ll | Flown dot (between two Ls) |
* Serbian (`sr_RS`) and Bosnian (`bs_BA`) locales:
* | Code | Glyph | Replacement | Description |
* | -------- | ----- | ----------- | --------------------------------------- |
* | U+0110 | Đ | DJ | Latin capital letter D with stroke |
* | U+0111 | đ | dj | Latin small letter d with stroke |
* @since 4.6.0 Added locale support for `de_CH`, `de_CH_informal`, and `ca`.
* @since 4.7.0 Added locale support for `sr_RS`.
* @since 4.8.0 Added locale support for `bs_BA`.
* @since 5.7.0 Added locale support for `de_AT`.
* @param string $string Text that might have accent characters
* @return string Filtered string with replaced "nice" characters.
function remove_accents( $string ) {
if ( ! preg_match( '/[\x80-\xff]/', $string ) ) {
if ( seems_utf8( $string ) ) {
// Decompositions for Latin-1 Supplement.
// Decompositions for Latin Extended-A.
// Decompositions for Latin Extended-B.
// Vowels with diacritic (Vietnamese).
// Vowels with diacritic (Chinese, Hanyu Pinyin).
// Used for locale-specific rules.
if ( in_array( $locale, array( 'de_DE', 'de_DE_formal', 'de_CH', 'de_CH_informal', 'de_AT' ), true ) ) {
} elseif ( 'da_DK' === $locale ) {
} elseif ( 'ca' === $locale ) {
} elseif ( 'sr_RS' === $locale || 'bs_BA' === $locale ) {
$string = strtr( $string, $chars );
// Assume ISO-8859-1 if not UTF-8.
$chars['in'] = "\x80\x83\x8a\x8e\x9a\x9e"
. "\x9f\xa2\xa5\xb5\xc0\xc1\xc2"
. "\xc3\xc4\xc5\xc7\xc8\xc9\xca"
. "\xcb\xcc\xcd\xce\xcf\xd1\xd2"
. "\xd3\xd4\xd5\xd6\xd8\xd9\xda"
. "\xdb\xdc\xdd\xe0\xe1\xe2\xe3"
. "\xe4\xe5\xe7\xe8\xe9\xea\xeb"
. "\xec\xed\xee\xef\xf1\xf2\xf3"
. "\xf4\xf5\xf6\xf8\xf9\xfa\xfb"
$chars['out'] = 'EfSZszYcYuAAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy';
$string = strtr( $string, $chars['in'], $chars['out'] );
$double_chars['in'] = array( "\x8c", "\x9c", "\xc6", "\xd0", "\xde", "\xdf", "\xe6", "\xf0", "\xfe" );
$double_chars['out'] = array( 'OE', 'oe', 'AE', 'DH', 'TH', 'ss', 'ae', 'dh', 'th' );
$string = str_replace( $double_chars['in'], $double_chars['out'], $string );
* Sanitizes a filename, replacing whitespace with dashes.
* Removes special characters that are illegal in filenames on certain
* operating systems and special characters requiring special escaping
* to manipulate at the command line. Replaces spaces and consecutive
* dashes with a single dash. Trims period, dash and underscore from beginning
* and end of filename. It is not guaranteed that this function will return a
* filename that is allowed to be uploaded.
* @param string $filename The filename to be sanitized.
* @return string The sanitized filename.
function sanitize_file_name( $filename ) {
$filename_raw = $filename;
$filename = remove_accents( $filename );
$special_chars = array( '?', '[', ']', '/', '\\', '=', '<', '>', ':', ';', ',', "'", '"', '&', '$', '#', '*', '(', ')', '|', '~', '`', '!', '{', '}', '%', '+', '’', '«', '»', '”', '“', chr( 0 ) );
// Check for support for utf8 in the installed PCRE library once and store the result in a static.
static $utf8_pcre = null;
if ( ! isset( $utf8_pcre ) ) {