Edit File by line
/home/barbar84/public_h.../wp-inclu.../Requests
File: IDNAEncoder.php
<?php
[0] Fix | Delete
[1] Fix | Delete
/**
[2] Fix | Delete
* IDNA URL encoder
[3] Fix | Delete
*
[4] Fix | Delete
* Note: Not fully compliant, as nameprep does nothing yet.
[5] Fix | Delete
*
[6] Fix | Delete
* @package Requests
[7] Fix | Delete
* @subpackage Utilities
[8] Fix | Delete
* @see https://tools.ietf.org/html/rfc3490 IDNA specification
[9] Fix | Delete
* @see https://tools.ietf.org/html/rfc3492 Punycode/Bootstrap specification
[10] Fix | Delete
*/
[11] Fix | Delete
class Requests_IDNAEncoder {
[12] Fix | Delete
/**
[13] Fix | Delete
* ACE prefix used for IDNA
[14] Fix | Delete
*
[15] Fix | Delete
* @see https://tools.ietf.org/html/rfc3490#section-5
[16] Fix | Delete
* @var string
[17] Fix | Delete
*/
[18] Fix | Delete
const ACE_PREFIX = 'xn--';
[19] Fix | Delete
[20] Fix | Delete
/**#@+
[21] Fix | Delete
* Bootstrap constant for Punycode
[22] Fix | Delete
*
[23] Fix | Delete
* @see https://tools.ietf.org/html/rfc3492#section-5
[24] Fix | Delete
* @var int
[25] Fix | Delete
*/
[26] Fix | Delete
const BOOTSTRAP_BASE = 36;
[27] Fix | Delete
const BOOTSTRAP_TMIN = 1;
[28] Fix | Delete
const BOOTSTRAP_TMAX = 26;
[29] Fix | Delete
const BOOTSTRAP_SKEW = 38;
[30] Fix | Delete
const BOOTSTRAP_DAMP = 700;
[31] Fix | Delete
const BOOTSTRAP_INITIAL_BIAS = 72;
[32] Fix | Delete
const BOOTSTRAP_INITIAL_N = 128;
[33] Fix | Delete
/**#@-*/
[34] Fix | Delete
[35] Fix | Delete
/**
[36] Fix | Delete
* Encode a hostname using Punycode
[37] Fix | Delete
*
[38] Fix | Delete
* @param string $string Hostname
[39] Fix | Delete
* @return string Punycode-encoded hostname
[40] Fix | Delete
*/
[41] Fix | Delete
public static function encode($string) {
[42] Fix | Delete
$parts = explode('.', $string);
[43] Fix | Delete
foreach ($parts as &$part) {
[44] Fix | Delete
$part = self::to_ascii($part);
[45] Fix | Delete
}
[46] Fix | Delete
return implode('.', $parts);
[47] Fix | Delete
}
[48] Fix | Delete
[49] Fix | Delete
/**
[50] Fix | Delete
* Convert a UTF-8 string to an ASCII string using Punycode
[51] Fix | Delete
*
[52] Fix | Delete
* @throws Requests_Exception Provided string longer than 64 ASCII characters (`idna.provided_too_long`)
[53] Fix | Delete
* @throws Requests_Exception Prepared string longer than 64 ASCII characters (`idna.prepared_too_long`)
[54] Fix | Delete
* @throws Requests_Exception Provided string already begins with xn-- (`idna.provided_is_prefixed`)
[55] Fix | Delete
* @throws Requests_Exception Encoded string longer than 64 ASCII characters (`idna.encoded_too_long`)
[56] Fix | Delete
*
[57] Fix | Delete
* @param string $string ASCII or UTF-8 string (max length 64 characters)
[58] Fix | Delete
* @return string ASCII string
[59] Fix | Delete
*/
[60] Fix | Delete
public static function to_ascii($string) {
[61] Fix | Delete
// Step 1: Check if the string is already ASCII
[62] Fix | Delete
if (self::is_ascii($string)) {
[63] Fix | Delete
// Skip to step 7
[64] Fix | Delete
if (strlen($string) < 64) {
[65] Fix | Delete
return $string;
[66] Fix | Delete
}
[67] Fix | Delete
[68] Fix | Delete
throw new Requests_Exception('Provided string is too long', 'idna.provided_too_long', $string);
[69] Fix | Delete
}
[70] Fix | Delete
[71] Fix | Delete
// Step 2: nameprep
[72] Fix | Delete
$string = self::nameprep($string);
[73] Fix | Delete
[74] Fix | Delete
// Step 3: UseSTD3ASCIIRules is false, continue
[75] Fix | Delete
// Step 4: Check if it's ASCII now
[76] Fix | Delete
if (self::is_ascii($string)) {
[77] Fix | Delete
// Skip to step 7
[78] Fix | Delete
if (strlen($string) < 64) {
[79] Fix | Delete
return $string;
[80] Fix | Delete
}
[81] Fix | Delete
[82] Fix | Delete
throw new Requests_Exception('Prepared string is too long', 'idna.prepared_too_long', $string);
[83] Fix | Delete
}
[84] Fix | Delete
[85] Fix | Delete
// Step 5: Check ACE prefix
[86] Fix | Delete
if (strpos($string, self::ACE_PREFIX) === 0) {
[87] Fix | Delete
throw new Requests_Exception('Provided string begins with ACE prefix', 'idna.provided_is_prefixed', $string);
[88] Fix | Delete
}
[89] Fix | Delete
[90] Fix | Delete
// Step 6: Encode with Punycode
[91] Fix | Delete
$string = self::punycode_encode($string);
[92] Fix | Delete
[93] Fix | Delete
// Step 7: Prepend ACE prefix
[94] Fix | Delete
$string = self::ACE_PREFIX . $string;
[95] Fix | Delete
[96] Fix | Delete
// Step 8: Check size
[97] Fix | Delete
if (strlen($string) < 64) {
[98] Fix | Delete
return $string;
[99] Fix | Delete
}
[100] Fix | Delete
[101] Fix | Delete
throw new Requests_Exception('Encoded string is too long', 'idna.encoded_too_long', $string);
[102] Fix | Delete
}
[103] Fix | Delete
[104] Fix | Delete
/**
[105] Fix | Delete
* Check whether a given string contains only ASCII characters
[106] Fix | Delete
*
[107] Fix | Delete
* @internal (Testing found regex was the fastest implementation)
[108] Fix | Delete
*
[109] Fix | Delete
* @param string $string
[110] Fix | Delete
* @return bool Is the string ASCII-only?
[111] Fix | Delete
*/
[112] Fix | Delete
protected static function is_ascii($string) {
[113] Fix | Delete
return (preg_match('/(?:[^\x00-\x7F])/', $string) !== 1);
[114] Fix | Delete
}
[115] Fix | Delete
[116] Fix | Delete
/**
[117] Fix | Delete
* Prepare a string for use as an IDNA name
[118] Fix | Delete
*
[119] Fix | Delete
* @todo Implement this based on RFC 3491 and the newer 5891
[120] Fix | Delete
* @param string $string
[121] Fix | Delete
* @return string Prepared string
[122] Fix | Delete
*/
[123] Fix | Delete
protected static function nameprep($string) {
[124] Fix | Delete
return $string;
[125] Fix | Delete
}
[126] Fix | Delete
[127] Fix | Delete
/**
[128] Fix | Delete
* Convert a UTF-8 string to a UCS-4 codepoint array
[129] Fix | Delete
*
[130] Fix | Delete
* Based on Requests_IRI::replace_invalid_with_pct_encoding()
[131] Fix | Delete
*
[132] Fix | Delete
* @throws Requests_Exception Invalid UTF-8 codepoint (`idna.invalidcodepoint`)
[133] Fix | Delete
* @param string $input
[134] Fix | Delete
* @return array Unicode code points
[135] Fix | Delete
*/
[136] Fix | Delete
protected static function utf8_to_codepoints($input) {
[137] Fix | Delete
$codepoints = array();
[138] Fix | Delete
[139] Fix | Delete
// Get number of bytes
[140] Fix | Delete
$strlen = strlen($input);
[141] Fix | Delete
[142] Fix | Delete
for ($position = 0; $position < $strlen; $position++) {
[143] Fix | Delete
$value = ord($input[$position]);
[144] Fix | Delete
[145] Fix | Delete
// One byte sequence:
[146] Fix | Delete
if ((~$value & 0x80) === 0x80) {
[147] Fix | Delete
$character = $value;
[148] Fix | Delete
$length = 1;
[149] Fix | Delete
$remaining = 0;
[150] Fix | Delete
}
[151] Fix | Delete
// Two byte sequence:
[152] Fix | Delete
elseif (($value & 0xE0) === 0xC0) {
[153] Fix | Delete
$character = ($value & 0x1F) << 6;
[154] Fix | Delete
$length = 2;
[155] Fix | Delete
$remaining = 1;
[156] Fix | Delete
}
[157] Fix | Delete
// Three byte sequence:
[158] Fix | Delete
elseif (($value & 0xF0) === 0xE0) {
[159] Fix | Delete
$character = ($value & 0x0F) << 12;
[160] Fix | Delete
$length = 3;
[161] Fix | Delete
$remaining = 2;
[162] Fix | Delete
}
[163] Fix | Delete
// Four byte sequence:
[164] Fix | Delete
elseif (($value & 0xF8) === 0xF0) {
[165] Fix | Delete
$character = ($value & 0x07) << 18;
[166] Fix | Delete
$length = 4;
[167] Fix | Delete
$remaining = 3;
[168] Fix | Delete
}
[169] Fix | Delete
// Invalid byte:
[170] Fix | Delete
else {
[171] Fix | Delete
throw new Requests_Exception('Invalid Unicode codepoint', 'idna.invalidcodepoint', $value);
[172] Fix | Delete
}
[173] Fix | Delete
[174] Fix | Delete
if ($remaining > 0) {
[175] Fix | Delete
if ($position + $length > $strlen) {
[176] Fix | Delete
throw new Requests_Exception('Invalid Unicode codepoint', 'idna.invalidcodepoint', $character);
[177] Fix | Delete
}
[178] Fix | Delete
for ($position++; $remaining > 0; $position++) {
[179] Fix | Delete
$value = ord($input[$position]);
[180] Fix | Delete
[181] Fix | Delete
// If it is invalid, count the sequence as invalid and reprocess the current byte:
[182] Fix | Delete
if (($value & 0xC0) !== 0x80) {
[183] Fix | Delete
throw new Requests_Exception('Invalid Unicode codepoint', 'idna.invalidcodepoint', $character);
[184] Fix | Delete
}
[185] Fix | Delete
[186] Fix | Delete
$character |= ($value & 0x3F) << (--$remaining * 6);
[187] Fix | Delete
}
[188] Fix | Delete
$position--;
[189] Fix | Delete
}
[190] Fix | Delete
[191] Fix | Delete
if (
[192] Fix | Delete
// Non-shortest form sequences are invalid
[193] Fix | Delete
$length > 1 && $character <= 0x7F
[194] Fix | Delete
|| $length > 2 && $character <= 0x7FF
[195] Fix | Delete
|| $length > 3 && $character <= 0xFFFF
[196] Fix | Delete
// Outside of range of ucschar codepoints
[197] Fix | Delete
// Noncharacters
[198] Fix | Delete
|| ($character & 0xFFFE) === 0xFFFE
[199] Fix | Delete
|| $character >= 0xFDD0 && $character <= 0xFDEF
[200] Fix | Delete
|| (
[201] Fix | Delete
// Everything else not in ucschar
[202] Fix | Delete
$character > 0xD7FF && $character < 0xF900
[203] Fix | Delete
|| $character < 0x20
[204] Fix | Delete
|| $character > 0x7E && $character < 0xA0
[205] Fix | Delete
|| $character > 0xEFFFD
[206] Fix | Delete
)
[207] Fix | Delete
) {
[208] Fix | Delete
throw new Requests_Exception('Invalid Unicode codepoint', 'idna.invalidcodepoint', $character);
[209] Fix | Delete
}
[210] Fix | Delete
[211] Fix | Delete
$codepoints[] = $character;
[212] Fix | Delete
}
[213] Fix | Delete
[214] Fix | Delete
return $codepoints;
[215] Fix | Delete
}
[216] Fix | Delete
[217] Fix | Delete
/**
[218] Fix | Delete
* RFC3492-compliant encoder
[219] Fix | Delete
*
[220] Fix | Delete
* @internal Pseudo-code from Section 6.3 is commented with "#" next to relevant code
[221] Fix | Delete
* @throws Requests_Exception On character outside of the domain (never happens with Punycode) (`idna.character_outside_domain`)
[222] Fix | Delete
*
[223] Fix | Delete
* @param string $input UTF-8 encoded string to encode
[224] Fix | Delete
* @return string Punycode-encoded string
[225] Fix | Delete
*/
[226] Fix | Delete
public static function punycode_encode($input) {
[227] Fix | Delete
$output = '';
[228] Fix | Delete
# let n = initial_n
[229] Fix | Delete
$n = self::BOOTSTRAP_INITIAL_N;
[230] Fix | Delete
# let delta = 0
[231] Fix | Delete
$delta = 0;
[232] Fix | Delete
# let bias = initial_bias
[233] Fix | Delete
$bias = self::BOOTSTRAP_INITIAL_BIAS;
[234] Fix | Delete
# let h = b = the number of basic code points in the input
[235] Fix | Delete
$h = $b = 0; // see loop
[236] Fix | Delete
# copy them to the output in order
[237] Fix | Delete
$codepoints = self::utf8_to_codepoints($input);
[238] Fix | Delete
$extended = array();
[239] Fix | Delete
[240] Fix | Delete
foreach ($codepoints as $char) {
[241] Fix | Delete
if ($char < 128) {
[242] Fix | Delete
// Character is valid ASCII
[243] Fix | Delete
// TODO: this should also check if it's valid for a URL
[244] Fix | Delete
$output .= chr($char);
[245] Fix | Delete
$h++;
[246] Fix | Delete
}
[247] Fix | Delete
// Check if the character is non-ASCII, but below initial n
[248] Fix | Delete
// This never occurs for Punycode, so ignore in coverage
[249] Fix | Delete
// @codeCoverageIgnoreStart
[250] Fix | Delete
elseif ($char < $n) {
[251] Fix | Delete
throw new Requests_Exception('Invalid character', 'idna.character_outside_domain', $char);
[252] Fix | Delete
}
[253] Fix | Delete
// @codeCoverageIgnoreEnd
[254] Fix | Delete
else {
[255] Fix | Delete
$extended[$char] = true;
[256] Fix | Delete
}
[257] Fix | Delete
}
[258] Fix | Delete
$extended = array_keys($extended);
[259] Fix | Delete
sort($extended);
[260] Fix | Delete
$b = $h;
[261] Fix | Delete
# [copy them] followed by a delimiter if b > 0
[262] Fix | Delete
if (strlen($output) > 0) {
[263] Fix | Delete
$output .= '-';
[264] Fix | Delete
}
[265] Fix | Delete
# {if the input contains a non-basic code point < n then fail}
[266] Fix | Delete
# while h < length(input) do begin
[267] Fix | Delete
while ($h < count($codepoints)) {
[268] Fix | Delete
# let m = the minimum code point >= n in the input
[269] Fix | Delete
$m = array_shift($extended);
[270] Fix | Delete
//printf('next code point to insert is %s' . PHP_EOL, dechex($m));
[271] Fix | Delete
# let delta = delta + (m - n) * (h + 1), fail on overflow
[272] Fix | Delete
$delta += ($m - $n) * ($h + 1);
[273] Fix | Delete
# let n = m
[274] Fix | Delete
$n = $m;
[275] Fix | Delete
# for each code point c in the input (in order) do begin
[276] Fix | Delete
for ($num = 0; $num < count($codepoints); $num++) {
[277] Fix | Delete
$c = $codepoints[$num];
[278] Fix | Delete
# if c < n then increment delta, fail on overflow
[279] Fix | Delete
if ($c < $n) {
[280] Fix | Delete
$delta++;
[281] Fix | Delete
}
[282] Fix | Delete
# if c == n then begin
[283] Fix | Delete
elseif ($c === $n) {
[284] Fix | Delete
# let q = delta
[285] Fix | Delete
$q = $delta;
[286] Fix | Delete
# for k = base to infinity in steps of base do begin
[287] Fix | Delete
for ($k = self::BOOTSTRAP_BASE; ; $k += self::BOOTSTRAP_BASE) {
[288] Fix | Delete
# let t = tmin if k <= bias {+ tmin}, or
[289] Fix | Delete
# tmax if k >= bias + tmax, or k - bias otherwise
[290] Fix | Delete
if ($k <= ($bias + self::BOOTSTRAP_TMIN)) {
[291] Fix | Delete
$t = self::BOOTSTRAP_TMIN;
[292] Fix | Delete
}
[293] Fix | Delete
elseif ($k >= ($bias + self::BOOTSTRAP_TMAX)) {
[294] Fix | Delete
$t = self::BOOTSTRAP_TMAX;
[295] Fix | Delete
}
[296] Fix | Delete
else {
[297] Fix | Delete
$t = $k - $bias;
[298] Fix | Delete
}
[299] Fix | Delete
# if q < t then break
[300] Fix | Delete
if ($q < $t) {
[301] Fix | Delete
break;
[302] Fix | Delete
}
[303] Fix | Delete
# output the code point for digit t + ((q - t) mod (base - t))
[304] Fix | Delete
$digit = $t + (($q - $t) % (self::BOOTSTRAP_BASE - $t));
[305] Fix | Delete
$output .= self::digit_to_char($digit);
[306] Fix | Delete
# let q = (q - t) div (base - t)
[307] Fix | Delete
$q = floor(($q - $t) / (self::BOOTSTRAP_BASE - $t));
[308] Fix | Delete
# end
[309] Fix | Delete
}
[310] Fix | Delete
# output the code point for digit q
[311] Fix | Delete
$output .= self::digit_to_char($q);
[312] Fix | Delete
# let bias = adapt(delta, h + 1, test h equals b?)
[313] Fix | Delete
$bias = self::adapt($delta, $h + 1, $h === $b);
[314] Fix | Delete
# let delta = 0
[315] Fix | Delete
$delta = 0;
[316] Fix | Delete
# increment h
[317] Fix | Delete
$h++;
[318] Fix | Delete
# end
[319] Fix | Delete
}
[320] Fix | Delete
# end
[321] Fix | Delete
}
[322] Fix | Delete
# increment delta and n
[323] Fix | Delete
$delta++;
[324] Fix | Delete
$n++;
[325] Fix | Delete
# end
[326] Fix | Delete
}
[327] Fix | Delete
[328] Fix | Delete
return $output;
[329] Fix | Delete
}
[330] Fix | Delete
[331] Fix | Delete
/**
[332] Fix | Delete
* Convert a digit to its respective character
[333] Fix | Delete
*
[334] Fix | Delete
* @see https://tools.ietf.org/html/rfc3492#section-5
[335] Fix | Delete
* @throws Requests_Exception On invalid digit (`idna.invalid_digit`)
[336] Fix | Delete
*
[337] Fix | Delete
* @param int $digit Digit in the range 0-35
[338] Fix | Delete
* @return string Single character corresponding to digit
[339] Fix | Delete
*/
[340] Fix | Delete
protected static function digit_to_char($digit) {
[341] Fix | Delete
// @codeCoverageIgnoreStart
[342] Fix | Delete
// As far as I know, this never happens, but still good to be sure.
[343] Fix | Delete
if ($digit < 0 || $digit > 35) {
[344] Fix | Delete
throw new Requests_Exception(sprintf('Invalid digit %d', $digit), 'idna.invalid_digit', $digit);
[345] Fix | Delete
}
[346] Fix | Delete
// @codeCoverageIgnoreEnd
[347] Fix | Delete
$digits = 'abcdefghijklmnopqrstuvwxyz0123456789';
[348] Fix | Delete
return substr($digits, $digit, 1);
[349] Fix | Delete
}
[350] Fix | Delete
[351] Fix | Delete
/**
[352] Fix | Delete
* Adapt the bias
[353] Fix | Delete
*
[354] Fix | Delete
* @see https://tools.ietf.org/html/rfc3492#section-6.1
[355] Fix | Delete
* @param int $delta
[356] Fix | Delete
* @param int $numpoints
[357] Fix | Delete
* @param bool $firsttime
[358] Fix | Delete
* @return int New bias
[359] Fix | Delete
*/
[360] Fix | Delete
protected static function adapt($delta, $numpoints, $firsttime) {
[361] Fix | Delete
# function adapt(delta,numpoints,firsttime):
[362] Fix | Delete
# if firsttime then let delta = delta div damp
[363] Fix | Delete
if ($firsttime) {
[364] Fix | Delete
$delta = floor($delta / self::BOOTSTRAP_DAMP);
[365] Fix | Delete
}
[366] Fix | Delete
# else let delta = delta div 2
[367] Fix | Delete
else {
[368] Fix | Delete
$delta = floor($delta / 2);
[369] Fix | Delete
}
[370] Fix | Delete
# let delta = delta + (delta div numpoints)
[371] Fix | Delete
$delta += floor($delta / $numpoints);
[372] Fix | Delete
# let k = 0
[373] Fix | Delete
$k = 0;
[374] Fix | Delete
# while delta > ((base - tmin) * tmax) div 2 do begin
[375] Fix | Delete
$max = floor(((self::BOOTSTRAP_BASE - self::BOOTSTRAP_TMIN) * self::BOOTSTRAP_TMAX) / 2);
[376] Fix | Delete
while ($delta > $max) {
[377] Fix | Delete
# let delta = delta div (base - tmin)
[378] Fix | Delete
$delta = floor($delta / (self::BOOTSTRAP_BASE - self::BOOTSTRAP_TMIN));
[379] Fix | Delete
# let k = k + base
[380] Fix | Delete
$k += self::BOOTSTRAP_BASE;
[381] Fix | Delete
# end
[382] Fix | Delete
}
[383] Fix | Delete
# return k + (((base - tmin + 1) * delta) div (delta + skew))
[384] Fix | Delete
return $k + floor(((self::BOOTSTRAP_BASE - self::BOOTSTRAP_TMIN + 1) * $delta) / ($delta + self::BOOTSTRAP_SKEW));
[385] Fix | Delete
}
[386] Fix | Delete
}
[387] Fix | Delete
It is recommended that you Edit text format, this type of Fix handles quite a lot in one request
Function