[ Index ]

PHP Cross Reference of YOURLS

title

Body

[close]

/includes/vendor/symfony/polyfill-intl-idn/ -> Idn.php (source)

   1  <?php
   2  
   3  /*
   4   * This file is part of the Symfony package.
   5   *
   6   * (c) Fabien Potencier <[email protected]> and Trevor Rowbotham <[email protected]>
   7   *
   8   * For the full copyright and license information, please view the LICENSE
   9   * file that was distributed with this source code.
  10   */
  11  
  12  namespace Symfony\Polyfill\Intl\Idn;
  13  
  14  use Exception;
  15  use Normalizer;
  16  use Symfony\Polyfill\Intl\Idn\Resources\unidata\DisallowedRanges;
  17  use Symfony\Polyfill\Intl\Idn\Resources\unidata\Regex;
  18  
  19  /**
  20   * @see https://www.unicode.org/reports/tr46/
  21   *
  22   * @internal
  23   */
  24  final class Idn
  25  {
  26      public const ERROR_EMPTY_LABEL = 1;
  27      public const ERROR_LABEL_TOO_LONG = 2;
  28      public const ERROR_DOMAIN_NAME_TOO_LONG = 4;
  29      public const ERROR_LEADING_HYPHEN = 8;
  30      public const ERROR_TRAILING_HYPHEN = 0x10;
  31      public const ERROR_HYPHEN_3_4 = 0x20;
  32      public const ERROR_LEADING_COMBINING_MARK = 0x40;
  33      public const ERROR_DISALLOWED = 0x80;
  34      public const ERROR_PUNYCODE = 0x100;
  35      public const ERROR_LABEL_HAS_DOT = 0x200;
  36      public const ERROR_INVALID_ACE_LABEL = 0x400;
  37      public const ERROR_BIDI = 0x800;
  38      public const ERROR_CONTEXTJ = 0x1000;
  39      public const ERROR_CONTEXTO_PUNCTUATION = 0x2000;
  40      public const ERROR_CONTEXTO_DIGITS = 0x4000;
  41  
  42      public const INTL_IDNA_VARIANT_2003 = 0;
  43      public const INTL_IDNA_VARIANT_UTS46 = 1;
  44  
  45      public const IDNA_DEFAULT = 0;
  46      public const IDNA_ALLOW_UNASSIGNED = 1;
  47      public const IDNA_USE_STD3_RULES = 2;
  48      public const IDNA_CHECK_BIDI = 4;
  49      public const IDNA_CHECK_CONTEXTJ = 8;
  50      public const IDNA_NONTRANSITIONAL_TO_ASCII = 16;
  51      public const IDNA_NONTRANSITIONAL_TO_UNICODE = 32;
  52  
  53      public const MAX_DOMAIN_SIZE = 253;
  54      public const MAX_LABEL_SIZE = 63;
  55  
  56      public const BASE = 36;
  57      public const TMIN = 1;
  58      public const TMAX = 26;
  59      public const SKEW = 38;
  60      public const DAMP = 700;
  61      public const INITIAL_BIAS = 72;
  62      public const INITIAL_N = 128;
  63      public const DELIMITER = '-';
  64      public const MAX_INT = 2147483647;
  65  
  66      /**
  67       * Contains the numeric value of a basic code point (for use in representing integers) in the
  68       * range 0 to BASE-1, or -1 if b is does not represent a value.
  69       *
  70       * @var array<int, int>
  71       */
  72      private static $basicToDigit = [
  73          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  74          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  75  
  76          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  77          26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, -1,
  78  
  79          -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
  80          15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
  81  
  82          -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
  83          15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
  84  
  85          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  86          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  87  
  88          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  89          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  90  
  91          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  92          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  93  
  94          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  95          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  96      ];
  97  
  98      /**
  99       * @var array<int, int>
 100       */
 101      private static $virama;
 102  
 103      /**
 104       * @var array<int, string>
 105       */
 106      private static $mapped;
 107  
 108      /**
 109       * @var array<int, bool>
 110       */
 111      private static $ignored;
 112  
 113      /**
 114       * @var array<int, string>
 115       */
 116      private static $deviation;
 117  
 118      /**
 119       * @var array<int, bool>
 120       */
 121      private static $disallowed;
 122  
 123      /**
 124       * @var array<int, string>
 125       */
 126      private static $disallowed_STD3_mapped;
 127  
 128      /**
 129       * @var array<int, bool>
 130       */
 131      private static $disallowed_STD3_valid;
 132  
 133      /**
 134       * @var bool
 135       */
 136      private static $mappingTableLoaded = false;
 137  
 138      /**
 139       * @see https://www.unicode.org/reports/tr46/#ToASCII
 140       *
 141       * @param string $domainName
 142       * @param int    $options
 143       * @param int    $variant
 144       * @param array  $idna_info
 145       *
 146       * @return string|false
 147       */
 148      public static function idn_to_ascii($domainName, $options = self::IDNA_DEFAULT, $variant = self::INTL_IDNA_VARIANT_UTS46, &$idna_info = [])
 149      {
 150          if (\PHP_VERSION_ID >= 70200 && self::INTL_IDNA_VARIANT_2003 === $variant) {
 151              @trigger_error('idn_to_ascii(): INTL_IDNA_VARIANT_2003 is deprecated', \E_USER_DEPRECATED);
 152          }
 153  
 154          $options = [
 155              'CheckHyphens' => true,
 156              'CheckBidi' => self::INTL_IDNA_VARIANT_2003 === $variant || 0 !== ($options & self::IDNA_CHECK_BIDI),
 157              'CheckJoiners' => self::INTL_IDNA_VARIANT_UTS46 === $variant && 0 !== ($options & self::IDNA_CHECK_CONTEXTJ),
 158              'UseSTD3ASCIIRules' => 0 !== ($options & self::IDNA_USE_STD3_RULES),
 159              'Transitional_Processing' => self::INTL_IDNA_VARIANT_2003 === $variant || 0 === ($options & self::IDNA_NONTRANSITIONAL_TO_ASCII),
 160              'VerifyDnsLength' => true,
 161          ];
 162          $info = new Info();
 163          $labels = self::process((string) $domainName, $options, $info);
 164  
 165          foreach ($labels as $i => $label) {
 166              // Only convert labels to punycode that contain non-ASCII code points
 167              if (1 === preg_match('/[^\x00-\x7F]/', $label)) {
 168                  try {
 169                      $label = 'xn--'.self::punycodeEncode($label);
 170                  } catch (Exception $e) {
 171                      $info->errors |= self::ERROR_PUNYCODE;
 172                  }
 173  
 174                  $labels[$i] = $label;
 175              }
 176          }
 177  
 178          if ($options['VerifyDnsLength']) {
 179              self::validateDomainAndLabelLength($labels, $info);
 180          }
 181  
 182          $idna_info = [
 183              'result' => implode('.', $labels),
 184              'isTransitionalDifferent' => $info->transitionalDifferent,
 185              'errors' => $info->errors,
 186          ];
 187  
 188          return 0 === $info->errors ? $idna_info['result'] : false;
 189      }
 190  
 191      /**
 192       * @see https://www.unicode.org/reports/tr46/#ToUnicode
 193       *
 194       * @param string $domainName
 195       * @param int    $options
 196       * @param int    $variant
 197       * @param array  $idna_info
 198       *
 199       * @return string|false
 200       */
 201      public static function idn_to_utf8($domainName, $options = self::IDNA_DEFAULT, $variant = self::INTL_IDNA_VARIANT_UTS46, &$idna_info = [])
 202      {
 203          if (\PHP_VERSION_ID >= 70200 && self::INTL_IDNA_VARIANT_2003 === $variant) {
 204              @trigger_error('idn_to_utf8(): INTL_IDNA_VARIANT_2003 is deprecated', \E_USER_DEPRECATED);
 205          }
 206  
 207          $info = new Info();
 208          $labels = self::process((string) $domainName, [
 209              'CheckHyphens' => true,
 210              'CheckBidi' => self::INTL_IDNA_VARIANT_2003 === $variant || 0 !== ($options & self::IDNA_CHECK_BIDI),
 211              'CheckJoiners' => self::INTL_IDNA_VARIANT_UTS46 === $variant && 0 !== ($options & self::IDNA_CHECK_CONTEXTJ),
 212              'UseSTD3ASCIIRules' => 0 !== ($options & self::IDNA_USE_STD3_RULES),
 213              'Transitional_Processing' => self::INTL_IDNA_VARIANT_2003 === $variant || 0 === ($options & self::IDNA_NONTRANSITIONAL_TO_UNICODE),
 214          ], $info);
 215          $idna_info = [
 216              'result' => implode('.', $labels),
 217              'isTransitionalDifferent' => $info->transitionalDifferent,
 218              'errors' => $info->errors,
 219          ];
 220  
 221          return 0 === $info->errors ? $idna_info['result'] : false;
 222      }
 223  
 224      /**
 225       * @param string $label
 226       *
 227       * @return bool
 228       */
 229      private static function isValidContextJ(array $codePoints, $label)
 230      {
 231          if (!isset(self::$virama)) {
 232              self::$virama = require __DIR__.\DIRECTORY_SEPARATOR.'Resources'.\DIRECTORY_SEPARATOR.'unidata'.\DIRECTORY_SEPARATOR.'virama.php';
 233          }
 234  
 235          $offset = 0;
 236  
 237          foreach ($codePoints as $i => $codePoint) {
 238              if (0x200C !== $codePoint && 0x200D !== $codePoint) {
 239                  continue;
 240              }
 241  
 242              if (!isset($codePoints[$i - 1])) {
 243                  return false;
 244              }
 245  
 246              // If Canonical_Combining_Class(Before(cp)) .eq. Virama Then True;
 247              if (isset(self::$virama[$codePoints[$i - 1]])) {
 248                  continue;
 249              }
 250  
 251              // If RegExpMatch((Joining_Type:{L,D})(Joining_Type:T)*\u200C(Joining_Type:T)*(Joining_Type:{R,D})) Then
 252              // True;
 253              // Generated RegExp = ([Joining_Type:{L,D}][Joining_Type:T]*\u200C[Joining_Type:T]*)[Joining_Type:{R,D}]
 254              if (0x200C === $codePoint && 1 === preg_match(Regex::ZWNJ, $label, $matches, \PREG_OFFSET_CAPTURE, $offset)) {
 255                  $offset += \strlen($matches[1][0]);
 256  
 257                  continue;
 258              }
 259  
 260              return false;
 261          }
 262  
 263          return true;
 264      }
 265  
 266      /**
 267       * @see https://www.unicode.org/reports/tr46/#ProcessingStepMap
 268       *
 269       * @param string              $input
 270       * @param array<string, bool> $options
 271       *
 272       * @return string
 273       */
 274      private static function mapCodePoints($input, array $options, Info $info)
 275      {
 276          $str = '';
 277          $useSTD3ASCIIRules = $options['UseSTD3ASCIIRules'];
 278          $transitional = $options['Transitional_Processing'];
 279  
 280          foreach (self::utf8Decode($input) as $codePoint) {
 281              $data = self::lookupCodePointStatus($codePoint, $useSTD3ASCIIRules);
 282  
 283              switch ($data['status']) {
 284                  case 'disallowed':
 285                      $info->errors |= self::ERROR_DISALLOWED;
 286  
 287                      // no break.
 288  
 289                  case 'valid':
 290                      $str .= mb_chr($codePoint, 'utf-8');
 291  
 292                      break;
 293  
 294                  case 'ignored':
 295                      // Do nothing.
 296                      break;
 297  
 298                  case 'mapped':
 299                      $str .= $data['mapping'];
 300  
 301                      break;
 302  
 303                  case 'deviation':
 304                      $info->transitionalDifferent = true;
 305                      $str .= ($transitional ? $data['mapping'] : mb_chr($codePoint, 'utf-8'));
 306  
 307                      break;
 308              }
 309          }
 310  
 311          return $str;
 312      }
 313  
 314      /**
 315       * @see https://www.unicode.org/reports/tr46/#Processing
 316       *
 317       * @param string              $domain
 318       * @param array<string, bool> $options
 319       *
 320       * @return array<int, string>
 321       */
 322      private static function process($domain, array $options, Info $info)
 323      {
 324          // If VerifyDnsLength is not set, we are doing ToUnicode otherwise we are doing ToASCII and
 325          // we need to respect the VerifyDnsLength option.
 326          $checkForEmptyLabels = !isset($options['VerifyDnsLength']) || $options['VerifyDnsLength'];
 327  
 328          if ($checkForEmptyLabels && '' === $domain) {
 329              $info->errors |= self::ERROR_EMPTY_LABEL;
 330  
 331              return [$domain];
 332          }
 333  
 334          // Step 1. Map each code point in the domain name string
 335          $domain = self::mapCodePoints($domain, $options, $info);
 336  
 337          // Step 2. Normalize the domain name string to Unicode Normalization Form C.
 338          if (!Normalizer::isNormalized($domain, Normalizer::FORM_C)) {
 339              $domain = Normalizer::normalize($domain, Normalizer::FORM_C);
 340          }
 341  
 342          // Step 3. Break the string into labels at U+002E (.) FULL STOP.
 343          $labels = explode('.', $domain);
 344          $lastLabelIndex = \count($labels) - 1;
 345  
 346          // Step 4. Convert and validate each label in the domain name string.
 347          foreach ($labels as $i => $label) {
 348              $validationOptions = $options;
 349  
 350              if ('xn--' === substr($label, 0, 4)) {
 351                  try {
 352                      $label = self::punycodeDecode(substr($label, 4));
 353                  } catch (Exception $e) {
 354                      $info->errors |= self::ERROR_PUNYCODE;
 355  
 356                      continue;
 357                  }
 358  
 359                  $validationOptions['Transitional_Processing'] = false;
 360                  $labels[$i] = $label;
 361              }
 362  
 363              self::validateLabel($label, $info, $validationOptions, $i > 0 && $i === $lastLabelIndex);
 364          }
 365  
 366          if ($info->bidiDomain && !$info->validBidiDomain) {
 367              $info->errors |= self::ERROR_BIDI;
 368          }
 369  
 370          // Any input domain name string that does not record an error has been successfully
 371          // processed according to this specification. Conversely, if an input domain_name string
 372          // causes an error, then the processing of the input domain_name string fails. Determining
 373          // what to do with error input is up to the caller, and not in the scope of this document.
 374          return $labels;
 375      }
 376  
 377      /**
 378       * @see https://tools.ietf.org/html/rfc5893#section-2
 379       *
 380       * @param string $label
 381       */
 382      private static function validateBidiLabel($label, Info $info)
 383      {
 384          if (1 === preg_match(Regex::RTL_LABEL, $label)) {
 385              $info->bidiDomain = true;
 386  
 387              // Step 1. The first character must be a character with Bidi property L, R, or AL.
 388              // If it has the R or AL property, it is an RTL label
 389              if (1 !== preg_match(Regex::BIDI_STEP_1_RTL, $label)) {
 390                  $info->validBidiDomain = false;
 391  
 392                  return;
 393              }
 394  
 395              // Step 2. In an RTL label, only characters with the Bidi properties R, AL, AN, EN, ES,
 396              // CS, ET, ON, BN, or NSM are allowed.
 397              if (1 === preg_match(Regex::BIDI_STEP_2, $label)) {
 398                  $info->validBidiDomain = false;
 399  
 400                  return;
 401              }
 402  
 403              // Step 3. In an RTL label, the end of the label must be a character with Bidi property
 404              // R, AL, EN, or AN, followed by zero or more characters with Bidi property NSM.
 405              if (1 !== preg_match(Regex::BIDI_STEP_3, $label)) {
 406                  $info->validBidiDomain = false;
 407  
 408                  return;
 409              }
 410  
 411              // Step 4. In an RTL label, if an EN is present, no AN may be present, and vice versa.
 412              if (1 === preg_match(Regex::BIDI_STEP_4_AN, $label) && 1 === preg_match(Regex::BIDI_STEP_4_EN, $label)) {
 413                  $info->validBidiDomain = false;
 414  
 415                  return;
 416              }
 417  
 418              return;
 419          }
 420  
 421          // We are a LTR label
 422          // Step 1. The first character must be a character with Bidi property L, R, or AL.
 423          // If it has the L property, it is an LTR label.
 424          if (1 !== preg_match(Regex::BIDI_STEP_1_LTR, $label)) {
 425              $info->validBidiDomain = false;
 426  
 427              return;
 428          }
 429  
 430          // Step 5. In an LTR label, only characters with the Bidi properties L, EN,
 431          // ES, CS, ET, ON, BN, or NSM are allowed.
 432          if (1 === preg_match(Regex::BIDI_STEP_5, $label)) {
 433              $info->validBidiDomain = false;
 434  
 435              return;
 436          }
 437  
 438          // Step 6.In an LTR label, the end of the label must be a character with Bidi property L or
 439          // EN, followed by zero or more characters with Bidi property NSM.
 440          if (1 !== preg_match(Regex::BIDI_STEP_6, $label)) {
 441              $info->validBidiDomain = false;
 442  
 443              return;
 444          }
 445      }
 446  
 447      /**
 448       * @param array<int, string> $labels
 449       */
 450      private static function validateDomainAndLabelLength(array $labels, Info $info)
 451      {
 452          $maxDomainSize = self::MAX_DOMAIN_SIZE;
 453          $length = \count($labels);
 454  
 455          // Number of "." delimiters.
 456          $domainLength = $length - 1;
 457  
 458          // If the last label is empty and it is not the first label, then it is the root label.
 459          // Increase the max size by 1, making it 254, to account for the root label's "."
 460          // delimiter. This also means we don't need to check the last label's length for being too
 461          // long.
 462          if ($length > 1 && '' === $labels[$length - 1]) {
 463              ++$maxDomainSize;
 464              --$length;
 465          }
 466  
 467          for ($i = 0; $i < $length; ++$i) {
 468              $bytes = \strlen($labels[$i]);
 469              $domainLength += $bytes;
 470  
 471              if ($bytes > self::MAX_LABEL_SIZE) {
 472                  $info->errors |= self::ERROR_LABEL_TOO_LONG;
 473              }
 474          }
 475  
 476          if ($domainLength > $maxDomainSize) {
 477              $info->errors |= self::ERROR_DOMAIN_NAME_TOO_LONG;
 478          }
 479      }
 480  
 481      /**
 482       * @see https://www.unicode.org/reports/tr46/#Validity_Criteria
 483       *
 484       * @param string              $label
 485       * @param array<string, bool> $options
 486       * @param bool                $canBeEmpty
 487       */
 488      private static function validateLabel($label, Info $info, array $options, $canBeEmpty)
 489      {
 490          if ('' === $label) {
 491              if (!$canBeEmpty && (!isset($options['VerifyDnsLength']) || $options['VerifyDnsLength'])) {
 492                  $info->errors |= self::ERROR_EMPTY_LABEL;
 493              }
 494  
 495              return;
 496          }
 497  
 498          // Step 1. The label must be in Unicode Normalization Form C.
 499          if (!Normalizer::isNormalized($label, Normalizer::FORM_C)) {
 500              $info->errors |= self::ERROR_INVALID_ACE_LABEL;
 501          }
 502  
 503          $codePoints = self::utf8Decode($label);
 504  
 505          if ($options['CheckHyphens']) {
 506              // Step 2. If CheckHyphens, the label must not contain a U+002D HYPHEN-MINUS character
 507              // in both the thrid and fourth positions.
 508              if (isset($codePoints[2], $codePoints[3]) && 0x002D === $codePoints[2] && 0x002D === $codePoints[3]) {
 509                  $info->errors |= self::ERROR_HYPHEN_3_4;
 510              }
 511  
 512              // Step 3. If CheckHyphens, the label must neither begin nor end with a U+002D
 513              // HYPHEN-MINUS character.
 514              if ('-' === substr($label, 0, 1)) {
 515                  $info->errors |= self::ERROR_LEADING_HYPHEN;
 516              }
 517  
 518              if ('-' === substr($label, -1, 1)) {
 519                  $info->errors |= self::ERROR_TRAILING_HYPHEN;
 520              }
 521          }
 522  
 523          // Step 4. The label must not contain a U+002E (.) FULL STOP.
 524          if (false !== strpos($label, '.')) {
 525              $info->errors |= self::ERROR_LABEL_HAS_DOT;
 526          }
 527  
 528          // Step 5. The label must not begin with a combining mark, that is: General_Category=Mark.
 529          if (1 === preg_match(Regex::COMBINING_MARK, $label)) {
 530              $info->errors |= self::ERROR_LEADING_COMBINING_MARK;
 531          }
 532  
 533          // Step 6. Each code point in the label must only have certain status values according to
 534          // Section 5, IDNA Mapping Table:
 535          $transitional = $options['Transitional_Processing'];
 536          $useSTD3ASCIIRules = $options['UseSTD3ASCIIRules'];
 537  
 538          foreach ($codePoints as $codePoint) {
 539              $data = self::lookupCodePointStatus($codePoint, $useSTD3ASCIIRules);
 540              $status = $data['status'];
 541  
 542              if ('valid' === $status || (!$transitional && 'deviation' === $status)) {
 543                  continue;
 544              }
 545  
 546              $info->errors |= self::ERROR_DISALLOWED;
 547  
 548              break;
 549          }
 550  
 551          // Step 7. If CheckJoiners, the label must satisify the ContextJ rules from Appendix A, in
 552          // The Unicode Code Points and Internationalized Domain Names for Applications (IDNA)
 553          // [IDNA2008].
 554          if ($options['CheckJoiners'] && !self::isValidContextJ($codePoints, $label)) {
 555              $info->errors |= self::ERROR_CONTEXTJ;
 556          }
 557  
 558          // Step 8. If CheckBidi, and if the domain name is a  Bidi domain name, then the label must
 559          // satisfy all six of the numbered conditions in [IDNA2008] RFC 5893, Section 2.
 560          if ($options['CheckBidi'] && (!$info->bidiDomain || $info->validBidiDomain)) {
 561              self::validateBidiLabel($label, $info);
 562          }
 563      }
 564  
 565      /**
 566       * @see https://tools.ietf.org/html/rfc3492#section-6.2
 567       *
 568       * @param string $input
 569       *
 570       * @return string
 571       */
 572      private static function punycodeDecode($input)
 573      {
 574          $n = self::INITIAL_N;
 575          $out = 0;
 576          $i = 0;
 577          $bias = self::INITIAL_BIAS;
 578          $lastDelimIndex = strrpos($input, self::DELIMITER);
 579          $b = false === $lastDelimIndex ? 0 : $lastDelimIndex;
 580          $inputLength = \strlen($input);
 581          $output = [];
 582          $bytes = array_map('ord', str_split($input));
 583  
 584          for ($j = 0; $j < $b; ++$j) {
 585              if ($bytes[$j] > 0x7F) {
 586                  throw new Exception('Invalid input');
 587              }
 588  
 589              $output[$out++] = $input[$j];
 590          }
 591  
 592          if ($b > 0) {
 593              ++$b;
 594          }
 595  
 596          for ($in = $b; $in < $inputLength; ++$out) {
 597              $oldi = $i;
 598              $w = 1;
 599  
 600              for ($k = self::BASE; /* no condition */; $k += self::BASE) {
 601                  if ($in >= $inputLength) {
 602                      throw new Exception('Invalid input');
 603                  }
 604  
 605                  $digit = self::$basicToDigit[$bytes[$in++] & 0xFF];
 606  
 607                  if ($digit < 0) {
 608                      throw new Exception('Invalid input');
 609                  }
 610  
 611                  if ($digit > intdiv(self::MAX_INT - $i, $w)) {
 612                      throw new Exception('Integer overflow');
 613                  }
 614  
 615                  $i += $digit * $w;
 616  
 617                  if ($k <= $bias) {
 618                      $t = self::TMIN;
 619                  } elseif ($k >= $bias + self::TMAX) {
 620                      $t = self::TMAX;
 621                  } else {
 622                      $t = $k - $bias;
 623                  }
 624  
 625                  if ($digit < $t) {
 626                      break;
 627                  }
 628  
 629                  $baseMinusT = self::BASE - $t;
 630  
 631                  if ($w > intdiv(self::MAX_INT, $baseMinusT)) {
 632                      throw new Exception('Integer overflow');
 633                  }
 634  
 635                  $w *= $baseMinusT;
 636              }
 637  
 638              $outPlusOne = $out + 1;
 639              $bias = self::adaptBias($i - $oldi, $outPlusOne, 0 === $oldi);
 640  
 641              if (intdiv($i, $outPlusOne) > self::MAX_INT - $n) {
 642                  throw new Exception('Integer overflow');
 643              }
 644  
 645              $n += intdiv($i, $outPlusOne);
 646              $i %= $outPlusOne;
 647              array_splice($output, $i++, 0, [mb_chr($n, 'utf-8')]);
 648          }
 649  
 650          return implode('', $output);
 651      }
 652  
 653      /**
 654       * @see https://tools.ietf.org/html/rfc3492#section-6.3
 655       *
 656       * @param string $input
 657       *
 658       * @return string
 659       */
 660      private static function punycodeEncode($input)
 661      {
 662          $n = self::INITIAL_N;
 663          $delta = 0;
 664          $out = 0;
 665          $bias = self::INITIAL_BIAS;
 666          $inputLength = 0;
 667          $output = '';
 668          $iter = self::utf8Decode($input);
 669  
 670          foreach ($iter as $codePoint) {
 671              ++$inputLength;
 672  
 673              if ($codePoint < 0x80) {
 674                  $output .= \chr($codePoint);
 675                  ++$out;
 676              }
 677          }
 678  
 679          $h = $out;
 680          $b = $out;
 681  
 682          if ($b > 0) {
 683              $output .= self::DELIMITER;
 684              ++$out;
 685          }
 686  
 687          while ($h < $inputLength) {
 688              $m = self::MAX_INT;
 689  
 690              foreach ($iter as $codePoint) {
 691                  if ($codePoint >= $n && $codePoint < $m) {
 692                      $m = $codePoint;
 693                  }
 694              }
 695  
 696              if ($m - $n > intdiv(self::MAX_INT - $delta, $h + 1)) {
 697                  throw new Exception('Integer overflow');
 698              }
 699  
 700              $delta += ($m - $n) * ($h + 1);
 701              $n = $m;
 702  
 703              foreach ($iter as $codePoint) {
 704                  if ($codePoint < $n && 0 === ++$delta) {
 705                      throw new Exception('Integer overflow');
 706                  }
 707  
 708                  if ($codePoint === $n) {
 709                      $q = $delta;
 710  
 711                      for ($k = self::BASE; /* no condition */; $k += self::BASE) {
 712                          if ($k <= $bias) {
 713                              $t = self::TMIN;
 714                          } elseif ($k >= $bias + self::TMAX) {
 715                              $t = self::TMAX;
 716                          } else {
 717                              $t = $k - $bias;
 718                          }
 719  
 720                          if ($q < $t) {
 721                              break;
 722                          }
 723  
 724                          $qMinusT = $q - $t;
 725                          $baseMinusT = self::BASE - $t;
 726                          $output .= self::encodeDigit($t + $qMinusT % $baseMinusT, false);
 727                          ++$out;
 728                          $q = intdiv($qMinusT, $baseMinusT);
 729                      }
 730  
 731                      $output .= self::encodeDigit($q, false);
 732                      ++$out;
 733                      $bias = self::adaptBias($delta, $h + 1, $h === $b);
 734                      $delta = 0;
 735                      ++$h;
 736                  }
 737              }
 738  
 739              ++$delta;
 740              ++$n;
 741          }
 742  
 743          return $output;
 744      }
 745  
 746      /**
 747       * @see https://tools.ietf.org/html/rfc3492#section-6.1
 748       *
 749       * @param int  $delta
 750       * @param int  $numPoints
 751       * @param bool $firstTime
 752       *
 753       * @return int
 754       */
 755      private static function adaptBias($delta, $numPoints, $firstTime)
 756      {
 757          // xxx >> 1 is a faster way of doing intdiv(xxx, 2)
 758          $delta = $firstTime ? intdiv($delta, self::DAMP) : $delta >> 1;
 759          $delta += intdiv($delta, $numPoints);
 760          $k = 0;
 761  
 762          while ($delta > ((self::BASE - self::TMIN) * self::TMAX) >> 1) {
 763              $delta = intdiv($delta, self::BASE - self::TMIN);
 764              $k += self::BASE;
 765          }
 766  
 767          return $k + intdiv((self::BASE - self::TMIN + 1) * $delta, $delta + self::SKEW);
 768      }
 769  
 770      /**
 771       * @param int  $d
 772       * @param bool $flag
 773       *
 774       * @return string
 775       */
 776      private static function encodeDigit($d, $flag)
 777      {
 778          return \chr($d + 22 + 75 * ($d < 26 ? 1 : 0) - (($flag ? 1 : 0) << 5));
 779      }
 780  
 781      /**
 782       * Takes a UTF-8 encoded string and converts it into a series of integer code points. Any
 783       * invalid byte sequences will be replaced by a U+FFFD replacement code point.
 784       *
 785       * @see https://encoding.spec.whatwg.org/#utf-8-decoder
 786       *
 787       * @param string $input
 788       *
 789       * @return array<int, int>
 790       */
 791      private static function utf8Decode($input)
 792      {
 793          $bytesSeen = 0;
 794          $bytesNeeded = 0;
 795          $lowerBoundary = 0x80;
 796          $upperBoundary = 0xBF;
 797          $codePoint = 0;
 798          $codePoints = [];
 799          $length = \strlen($input);
 800  
 801          for ($i = 0; $i < $length; ++$i) {
 802              $byte = \ord($input[$i]);
 803  
 804              if (0 === $bytesNeeded) {
 805                  if ($byte >= 0x00 && $byte <= 0x7F) {
 806                      $codePoints[] = $byte;
 807  
 808                      continue;
 809                  }
 810  
 811                  if ($byte >= 0xC2 && $byte <= 0xDF) {
 812                      $bytesNeeded = 1;
 813                      $codePoint = $byte & 0x1F;
 814                  } elseif ($byte >= 0xE0 && $byte <= 0xEF) {
 815                      if (0xE0 === $byte) {
 816                          $lowerBoundary = 0xA0;
 817                      } elseif (0xED === $byte) {
 818                          $upperBoundary = 0x9F;
 819                      }
 820  
 821                      $bytesNeeded = 2;
 822                      $codePoint = $byte & 0xF;
 823                  } elseif ($byte >= 0xF0 && $byte <= 0xF4) {
 824                      if (0xF0 === $byte) {
 825                          $lowerBoundary = 0x90;
 826                      } elseif (0xF4 === $byte) {
 827                          $upperBoundary = 0x8F;
 828                      }
 829  
 830                      $bytesNeeded = 3;
 831                      $codePoint = $byte & 0x7;
 832                  } else {
 833                      $codePoints[] = 0xFFFD;
 834                  }
 835  
 836                  continue;
 837              }
 838  
 839              if ($byte < $lowerBoundary || $byte > $upperBoundary) {
 840                  $codePoint = 0;
 841                  $bytesNeeded = 0;
 842                  $bytesSeen = 0;
 843                  $lowerBoundary = 0x80;
 844                  $upperBoundary = 0xBF;
 845                  --$i;
 846                  $codePoints[] = 0xFFFD;
 847  
 848                  continue;
 849              }
 850  
 851              $lowerBoundary = 0x80;
 852              $upperBoundary = 0xBF;
 853              $codePoint = ($codePoint << 6) | ($byte & 0x3F);
 854  
 855              if (++$bytesSeen !== $bytesNeeded) {
 856                  continue;
 857              }
 858  
 859              $codePoints[] = $codePoint;
 860              $codePoint = 0;
 861              $bytesNeeded = 0;
 862              $bytesSeen = 0;
 863          }
 864  
 865          // String unexpectedly ended, so append a U+FFFD code point.
 866          if (0 !== $bytesNeeded) {
 867              $codePoints[] = 0xFFFD;
 868          }
 869  
 870          return $codePoints;
 871      }
 872  
 873      /**
 874       * @param int  $codePoint
 875       * @param bool $useSTD3ASCIIRules
 876       *
 877       * @return array{status: string, mapping?: string}
 878       */
 879      private static function lookupCodePointStatus($codePoint, $useSTD3ASCIIRules)
 880      {
 881          if (!self::$mappingTableLoaded) {
 882              self::$mappingTableLoaded = true;
 883              self::$mapped = require  __DIR__.'/Resources/unidata/mapped.php';
 884              self::$ignored = require  __DIR__.'/Resources/unidata/ignored.php';
 885              self::$deviation = require  __DIR__.'/Resources/unidata/deviation.php';
 886              self::$disallowed = require  __DIR__.'/Resources/unidata/disallowed.php';
 887              self::$disallowed_STD3_mapped = require  __DIR__.'/Resources/unidata/disallowed_STD3_mapped.php';
 888              self::$disallowed_STD3_valid = require  __DIR__.'/Resources/unidata/disallowed_STD3_valid.php';
 889          }
 890  
 891          if (isset(self::$mapped[$codePoint])) {
 892              return ['status' => 'mapped', 'mapping' => self::$mapped[$codePoint]];
 893          }
 894  
 895          if (isset(self::$ignored[$codePoint])) {
 896              return ['status' => 'ignored'];
 897          }
 898  
 899          if (isset(self::$deviation[$codePoint])) {
 900              return ['status' => 'deviation', 'mapping' => self::$deviation[$codePoint]];
 901          }
 902  
 903          if (isset(self::$disallowed[$codePoint]) || DisallowedRanges::inRange($codePoint)) {
 904              return ['status' => 'disallowed'];
 905          }
 906  
 907          $isDisallowedMapped = isset(self::$disallowed_STD3_mapped[$codePoint]);
 908  
 909          if ($isDisallowedMapped || isset(self::$disallowed_STD3_valid[$codePoint])) {
 910              $status = 'disallowed';
 911  
 912              if (!$useSTD3ASCIIRules) {
 913                  $status = $isDisallowedMapped ? 'mapped' : 'valid';
 914              }
 915  
 916              if ($isDisallowedMapped) {
 917                  return ['status' => $status, 'mapping' => self::$disallowed_STD3_mapped[$codePoint]];
 918              }
 919  
 920              return ['status' => $status];
 921          }
 922  
 923          return ['status' => 'valid'];
 924      }
 925  }


Generated: Sat Feb 22 05:10:06 2025 Cross-referenced by PHPXref 0.7.1