[ Index ]

PHP Cross Reference of YOURLS

title

Body

[close]

/includes/vendor/symfony/polyfill-intl-idn/ -> Idn.php (source)

   1  <?php
   2  
   3  /*
   4   * This file is part of the Symfony package.
   5   *
   6   * (c) Fabien Potencier <[email protected]> and Trevor Rowbotham <[email protected]>
   7   *
   8   * For the full copyright and license information, please view the LICENSE
   9   * file that was distributed with this source code.
  10   */
  11  
  12  namespace Symfony\Polyfill\Intl\Idn;
  13  
  14  use Symfony\Polyfill\Intl\Idn\Resources\unidata\DisallowedRanges;
  15  use Symfony\Polyfill\Intl\Idn\Resources\unidata\Regex;
  16  
  17  /**
  18   * @see https://www.unicode.org/reports/tr46/
  19   *
  20   * @internal
  21   */
  22  final class Idn
  23  {
  24      public const ERROR_EMPTY_LABEL = 1;
  25      public const ERROR_LABEL_TOO_LONG = 2;
  26      public const ERROR_DOMAIN_NAME_TOO_LONG = 4;
  27      public const ERROR_LEADING_HYPHEN = 8;
  28      public const ERROR_TRAILING_HYPHEN = 0x10;
  29      public const ERROR_HYPHEN_3_4 = 0x20;
  30      public const ERROR_LEADING_COMBINING_MARK = 0x40;
  31      public const ERROR_DISALLOWED = 0x80;
  32      public const ERROR_PUNYCODE = 0x100;
  33      public const ERROR_LABEL_HAS_DOT = 0x200;
  34      public const ERROR_INVALID_ACE_LABEL = 0x400;
  35      public const ERROR_BIDI = 0x800;
  36      public const ERROR_CONTEXTJ = 0x1000;
  37      public const ERROR_CONTEXTO_PUNCTUATION = 0x2000;
  38      public const ERROR_CONTEXTO_DIGITS = 0x4000;
  39  
  40      public const INTL_IDNA_VARIANT_2003 = 0;
  41      public const INTL_IDNA_VARIANT_UTS46 = 1;
  42  
  43      public const IDNA_DEFAULT = 0;
  44      public const IDNA_ALLOW_UNASSIGNED = 1;
  45      public const IDNA_USE_STD3_RULES = 2;
  46      public const IDNA_CHECK_BIDI = 4;
  47      public const IDNA_CHECK_CONTEXTJ = 8;
  48      public const IDNA_NONTRANSITIONAL_TO_ASCII = 16;
  49      public const IDNA_NONTRANSITIONAL_TO_UNICODE = 32;
  50  
  51      public const MAX_DOMAIN_SIZE = 253;
  52      public const MAX_LABEL_SIZE = 63;
  53  
  54      public const BASE = 36;
  55      public const TMIN = 1;
  56      public const TMAX = 26;
  57      public const SKEW = 38;
  58      public const DAMP = 700;
  59      public const INITIAL_BIAS = 72;
  60      public const INITIAL_N = 128;
  61      public const DELIMITER = '-';
  62      public const MAX_INT = 2147483647;
  63  
  64      /**
  65       * Contains the numeric value of a basic code point (for use in representing integers) in the
  66       * range 0 to BASE-1, or -1 if b is does not represent a value.
  67       *
  68       * @var array<int, int>
  69       */
  70      private static $basicToDigit = [
  71          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  72          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  73  
  74          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  75          26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, -1,
  76  
  77          -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
  78          15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
  79  
  80          -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
  81          15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
  82  
  83          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  84          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  85  
  86          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  87          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  88  
  89          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  90          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  91  
  92          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  93          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  94      ];
  95  
  96      /**
  97       * @var array<int, int>
  98       */
  99      private static $virama;
 100  
 101      /**
 102       * @var array<int, string>
 103       */
 104      private static $mapped;
 105  
 106      /**
 107       * @var array<int, bool>
 108       */
 109      private static $ignored;
 110  
 111      /**
 112       * @var array<int, string>
 113       */
 114      private static $deviation;
 115  
 116      /**
 117       * @var array<int, bool>
 118       */
 119      private static $disallowed;
 120  
 121      /**
 122       * @var array<int, string>
 123       */
 124      private static $disallowed_STD3_mapped;
 125  
 126      /**
 127       * @var array<int, bool>
 128       */
 129      private static $disallowed_STD3_valid;
 130  
 131      /**
 132       * @var bool
 133       */
 134      private static $mappingTableLoaded = false;
 135  
 136      /**
 137       * @see https://www.unicode.org/reports/tr46/#ToASCII
 138       *
 139       * @param string $domainName
 140       * @param int    $options
 141       * @param int    $variant
 142       * @param array  $idna_info
 143       *
 144       * @return string|false
 145       */
 146      public static function idn_to_ascii($domainName, $options = self::IDNA_DEFAULT, $variant = self::INTL_IDNA_VARIANT_UTS46, &$idna_info = [])
 147      {
 148          if (self::INTL_IDNA_VARIANT_2003 === $variant) {
 149              @trigger_error('idn_to_ascii(): INTL_IDNA_VARIANT_2003 is deprecated', \E_USER_DEPRECATED);
 150          }
 151  
 152          $options = [
 153              'CheckHyphens' => true,
 154              'CheckBidi' => self::INTL_IDNA_VARIANT_2003 === $variant || 0 !== ($options & self::IDNA_CHECK_BIDI),
 155              'CheckJoiners' => self::INTL_IDNA_VARIANT_UTS46 === $variant && 0 !== ($options & self::IDNA_CHECK_CONTEXTJ),
 156              'UseSTD3ASCIIRules' => 0 !== ($options & self::IDNA_USE_STD3_RULES),
 157              'Transitional_Processing' => self::INTL_IDNA_VARIANT_2003 === $variant || 0 === ($options & self::IDNA_NONTRANSITIONAL_TO_ASCII),
 158              'VerifyDnsLength' => true,
 159          ];
 160          $info = new Info();
 161          $labels = self::process((string) $domainName, $options, $info);
 162  
 163          foreach ($labels as $i => $label) {
 164              // Only convert labels to punycode that contain non-ASCII code points
 165              if (1 === preg_match('/[^\x00-\x7F]/', $label)) {
 166                  try {
 167                      $label = 'xn--'.self::punycodeEncode($label);
 168                  } catch (\Exception $e) {
 169                      $info->errors |= self::ERROR_PUNYCODE;
 170                  }
 171  
 172                  $labels[$i] = $label;
 173              }
 174          }
 175  
 176          if ($options['VerifyDnsLength']) {
 177              self::validateDomainAndLabelLength($labels, $info);
 178          }
 179  
 180          $idna_info = [
 181              'result' => implode('.', $labels),
 182              'isTransitionalDifferent' => $info->transitionalDifferent,
 183              'errors' => $info->errors,
 184          ];
 185  
 186          return 0 === $info->errors ? $idna_info['result'] : false;
 187      }
 188  
 189      /**
 190       * @see https://www.unicode.org/reports/tr46/#ToUnicode
 191       *
 192       * @param string $domainName
 193       * @param int    $options
 194       * @param int    $variant
 195       * @param array  $idna_info
 196       *
 197       * @return string|false
 198       */
 199      public static function idn_to_utf8($domainName, $options = self::IDNA_DEFAULT, $variant = self::INTL_IDNA_VARIANT_UTS46, &$idna_info = [])
 200      {
 201          if (self::INTL_IDNA_VARIANT_2003 === $variant) {
 202              @trigger_error('idn_to_utf8(): INTL_IDNA_VARIANT_2003 is deprecated', \E_USER_DEPRECATED);
 203          }
 204  
 205          $info = new Info();
 206          $labels = self::process((string) $domainName, [
 207              'CheckHyphens' => true,
 208              'CheckBidi' => self::INTL_IDNA_VARIANT_2003 === $variant || 0 !== ($options & self::IDNA_CHECK_BIDI),
 209              'CheckJoiners' => self::INTL_IDNA_VARIANT_UTS46 === $variant && 0 !== ($options & self::IDNA_CHECK_CONTEXTJ),
 210              'UseSTD3ASCIIRules' => 0 !== ($options & self::IDNA_USE_STD3_RULES),
 211              'Transitional_Processing' => self::INTL_IDNA_VARIANT_2003 === $variant || 0 === ($options & self::IDNA_NONTRANSITIONAL_TO_UNICODE),
 212          ], $info);
 213          $idna_info = [
 214              'result' => implode('.', $labels),
 215              'isTransitionalDifferent' => $info->transitionalDifferent,
 216              'errors' => $info->errors,
 217          ];
 218  
 219          return 0 === $info->errors ? $idna_info['result'] : false;
 220      }
 221  
 222      /**
 223       * @param string $label
 224       *
 225       * @return bool
 226       */
 227      private static function isValidContextJ(array $codePoints, $label)
 228      {
 229          if (!isset(self::$virama)) {
 230              self::$virama = require __DIR__.\DIRECTORY_SEPARATOR.'Resources'.\DIRECTORY_SEPARATOR.'unidata'.\DIRECTORY_SEPARATOR.'virama.php';
 231          }
 232  
 233          $offset = 0;
 234  
 235          foreach ($codePoints as $i => $codePoint) {
 236              if (0x200C !== $codePoint && 0x200D !== $codePoint) {
 237                  continue;
 238              }
 239  
 240              if (!isset($codePoints[$i - 1])) {
 241                  return false;
 242              }
 243  
 244              // If Canonical_Combining_Class(Before(cp)) .eq. Virama Then True;
 245              if (isset(self::$virama[$codePoints[$i - 1]])) {
 246                  continue;
 247              }
 248  
 249              // If RegExpMatch((Joining_Type:{L,D})(Joining_Type:T)*\u200C(Joining_Type:T)*(Joining_Type:{R,D})) Then
 250              // True;
 251              // Generated RegExp = ([Joining_Type:{L,D}][Joining_Type:T]*\u200C[Joining_Type:T]*)[Joining_Type:{R,D}]
 252              if (0x200C === $codePoint && 1 === preg_match(Regex::ZWNJ, $label, $matches, \PREG_OFFSET_CAPTURE, $offset)) {
 253                  $offset += \strlen($matches[1][0]);
 254  
 255                  continue;
 256              }
 257  
 258              return false;
 259          }
 260  
 261          return true;
 262      }
 263  
 264      /**
 265       * @see https://www.unicode.org/reports/tr46/#ProcessingStepMap
 266       *
 267       * @param string              $input
 268       * @param array<string, bool> $options
 269       *
 270       * @return string
 271       */
 272      private static function mapCodePoints($input, array $options, Info $info)
 273      {
 274          $str = '';
 275          $useSTD3ASCIIRules = $options['UseSTD3ASCIIRules'];
 276          $transitional = $options['Transitional_Processing'];
 277  
 278          foreach (self::utf8Decode($input) as $codePoint) {
 279              $data = self::lookupCodePointStatus($codePoint, $useSTD3ASCIIRules);
 280  
 281              switch ($data['status']) {
 282                  case 'disallowed':
 283                  case 'valid':
 284                      $str .= mb_chr($codePoint, 'utf-8');
 285  
 286                      break;
 287  
 288                  case 'ignored':
 289                      // Do nothing.
 290                      break;
 291  
 292                  case 'mapped':
 293                      $str .= $transitional && 0x1E9E === $codePoint ? 'ss' : $data['mapping'];
 294  
 295                      break;
 296  
 297                  case 'deviation':
 298                      $info->transitionalDifferent = true;
 299                      $str .= ($transitional ? $data['mapping'] : mb_chr($codePoint, 'utf-8'));
 300  
 301                      break;
 302              }
 303          }
 304  
 305          return $str;
 306      }
 307  
 308      /**
 309       * @see https://www.unicode.org/reports/tr46/#Processing
 310       *
 311       * @param string              $domain
 312       * @param array<string, bool> $options
 313       *
 314       * @return array<int, string>
 315       */
 316      private static function process($domain, array $options, Info $info)
 317      {
 318          // If VerifyDnsLength is not set, we are doing ToUnicode otherwise we are doing ToASCII and
 319          // we need to respect the VerifyDnsLength option.
 320          $checkForEmptyLabels = !isset($options['VerifyDnsLength']) || $options['VerifyDnsLength'];
 321  
 322          if ($checkForEmptyLabels && '' === $domain) {
 323              $info->errors |= self::ERROR_EMPTY_LABEL;
 324  
 325              return [$domain];
 326          }
 327  
 328          // Step 1. Map each code point in the domain name string
 329          $domain = self::mapCodePoints($domain, $options, $info);
 330  
 331          // Step 2. Normalize the domain name string to Unicode Normalization Form C.
 332          if (!\Normalizer::isNormalized($domain, \Normalizer::FORM_C)) {
 333              $domain = \Normalizer::normalize($domain, \Normalizer::FORM_C);
 334          }
 335  
 336          // Step 3. Break the string into labels at U+002E (.) FULL STOP.
 337          $labels = explode('.', $domain);
 338          $lastLabelIndex = \count($labels) - 1;
 339  
 340          // Step 4. Convert and validate each label in the domain name string.
 341          foreach ($labels as $i => $label) {
 342              $validationOptions = $options;
 343  
 344              if ('xn--' === substr($label, 0, 4)) {
 345                  // Step 4.1. If the label contains any non-ASCII code point (i.e., a code point greater than U+007F),
 346                  // record that there was an error, and continue with the next label.
 347                  if (preg_match('/[^\x00-\x7F]/', $label)) {
 348                      $info->errors |= self::ERROR_PUNYCODE;
 349  
 350                      continue;
 351                  }
 352  
 353                  // Step 4.2. Attempt to convert the rest of the label to Unicode according to Punycode [RFC3492]. If
 354                  // that conversion fails, record that there was an error, and continue
 355                  // with the next label. Otherwise replace the original label in the string by the results of the
 356                  // conversion.
 357                  try {
 358                      $label = self::punycodeDecode(substr($label, 4));
 359                  } catch (\Exception $e) {
 360                      $info->errors |= self::ERROR_PUNYCODE;
 361  
 362                      continue;
 363                  }
 364  
 365                  $validationOptions['Transitional_Processing'] = false;
 366                  $labels[$i] = $label;
 367              }
 368  
 369              self::validateLabel($label, $info, $validationOptions, $i > 0 && $i === $lastLabelIndex);
 370          }
 371  
 372          if ($info->bidiDomain && !$info->validBidiDomain) {
 373              $info->errors |= self::ERROR_BIDI;
 374          }
 375  
 376          // Any input domain name string that does not record an error has been successfully
 377          // processed according to this specification. Conversely, if an input domain_name string
 378          // causes an error, then the processing of the input domain_name string fails. Determining
 379          // what to do with error input is up to the caller, and not in the scope of this document.
 380          return $labels;
 381      }
 382  
 383      /**
 384       * @see https://tools.ietf.org/html/rfc5893#section-2
 385       *
 386       * @param string $label
 387       */
 388      private static function validateBidiLabel($label, Info $info)
 389      {
 390          if (1 === preg_match(Regex::RTL_LABEL, $label)) {
 391              $info->bidiDomain = true;
 392  
 393              // Step 1. The first character must be a character with Bidi property L, R, or AL.
 394              // If it has the R or AL property, it is an RTL label
 395              if (1 !== preg_match(Regex::BIDI_STEP_1_RTL, $label)) {
 396                  $info->validBidiDomain = false;
 397  
 398                  return;
 399              }
 400  
 401              // Step 2. In an RTL label, only characters with the Bidi properties R, AL, AN, EN, ES,
 402              // CS, ET, ON, BN, or NSM are allowed.
 403              if (1 === preg_match(Regex::BIDI_STEP_2, $label)) {
 404                  $info->validBidiDomain = false;
 405  
 406                  return;
 407              }
 408  
 409              // Step 3. In an RTL label, the end of the label must be a character with Bidi property
 410              // R, AL, EN, or AN, followed by zero or more characters with Bidi property NSM.
 411              if (1 !== preg_match(Regex::BIDI_STEP_3, $label)) {
 412                  $info->validBidiDomain = false;
 413  
 414                  return;
 415              }
 416  
 417              // Step 4. In an RTL label, if an EN is present, no AN may be present, and vice versa.
 418              if (1 === preg_match(Regex::BIDI_STEP_4_AN, $label) && 1 === preg_match(Regex::BIDI_STEP_4_EN, $label)) {
 419                  $info->validBidiDomain = false;
 420  
 421                  return;
 422              }
 423  
 424              return;
 425          }
 426  
 427          // We are a LTR label
 428          // Step 1. The first character must be a character with Bidi property L, R, or AL.
 429          // If it has the L property, it is an LTR label.
 430          if (1 !== preg_match(Regex::BIDI_STEP_1_LTR, $label)) {
 431              $info->validBidiDomain = false;
 432  
 433              return;
 434          }
 435  
 436          // Step 5. In an LTR label, only characters with the Bidi properties L, EN,
 437          // ES, CS, ET, ON, BN, or NSM are allowed.
 438          if (1 === preg_match(Regex::BIDI_STEP_5, $label)) {
 439              $info->validBidiDomain = false;
 440  
 441              return;
 442          }
 443  
 444          // Step 6.In an LTR label, the end of the label must be a character with Bidi property L or
 445          // EN, followed by zero or more characters with Bidi property NSM.
 446          if (1 !== preg_match(Regex::BIDI_STEP_6, $label)) {
 447              $info->validBidiDomain = false;
 448  
 449              return;
 450          }
 451      }
 452  
 453      /**
 454       * @param array<int, string> $labels
 455       */
 456      private static function validateDomainAndLabelLength(array $labels, Info $info)
 457      {
 458          $maxDomainSize = self::MAX_DOMAIN_SIZE;
 459          $length = \count($labels);
 460  
 461          // Number of "." delimiters.
 462          $domainLength = $length - 1;
 463  
 464          // If the last label is empty and it is not the first label, then it is the root label.
 465          // Increase the max size by 1, making it 254, to account for the root label's "."
 466          // delimiter. This also means we don't need to check the last label's length for being too
 467          // long.
 468          if ($length > 1 && '' === $labels[$length - 1]) {
 469              ++$maxDomainSize;
 470              --$length;
 471          }
 472  
 473          for ($i = 0; $i < $length; ++$i) {
 474              $bytes = \strlen($labels[$i]);
 475              $domainLength += $bytes;
 476  
 477              if ($bytes > self::MAX_LABEL_SIZE) {
 478                  $info->errors |= self::ERROR_LABEL_TOO_LONG;
 479              }
 480          }
 481  
 482          if ($domainLength > $maxDomainSize) {
 483              $info->errors |= self::ERROR_DOMAIN_NAME_TOO_LONG;
 484          }
 485      }
 486  
 487      /**
 488       * @see https://www.unicode.org/reports/tr46/#Validity_Criteria
 489       *
 490       * @param string              $label
 491       * @param array<string, bool> $options
 492       * @param bool                $canBeEmpty
 493       */
 494      private static function validateLabel($label, Info $info, array $options, $canBeEmpty)
 495      {
 496          if ('' === $label) {
 497              if (!$canBeEmpty && (!isset($options['VerifyDnsLength']) || $options['VerifyDnsLength'])) {
 498                  $info->errors |= self::ERROR_EMPTY_LABEL;
 499              }
 500  
 501              return;
 502          }
 503  
 504          // Step 1. The label must be in Unicode Normalization Form C.
 505          if (!\Normalizer::isNormalized($label, \Normalizer::FORM_C)) {
 506              $info->errors |= self::ERROR_INVALID_ACE_LABEL;
 507          }
 508  
 509          $codePoints = self::utf8Decode($label);
 510  
 511          if ($options['CheckHyphens']) {
 512              // Step 2. If CheckHyphens, the label must not contain a U+002D HYPHEN-MINUS character
 513              // in both the thrid and fourth positions.
 514              if (isset($codePoints[2], $codePoints[3]) && 0x002D === $codePoints[2] && 0x002D === $codePoints[3]) {
 515                  $info->errors |= self::ERROR_HYPHEN_3_4;
 516              }
 517  
 518              // Step 3. If CheckHyphens, the label must neither begin nor end with a U+002D
 519              // HYPHEN-MINUS character.
 520              if ('-' === substr($label, 0, 1)) {
 521                  $info->errors |= self::ERROR_LEADING_HYPHEN;
 522              }
 523  
 524              if ('-' === substr($label, -1, 1)) {
 525                  $info->errors |= self::ERROR_TRAILING_HYPHEN;
 526              }
 527          } elseif ('xn--' === substr($label, 0, 4)) {
 528              $info->errors |= self::ERROR_PUNYCODE;
 529          }
 530  
 531          // Step 4. The label must not contain a U+002E (.) FULL STOP.
 532          if (false !== strpos($label, '.')) {
 533              $info->errors |= self::ERROR_LABEL_HAS_DOT;
 534          }
 535  
 536          // Step 5. The label must not begin with a combining mark, that is: General_Category=Mark.
 537          if (1 === preg_match(Regex::COMBINING_MARK, $label)) {
 538              $info->errors |= self::ERROR_LEADING_COMBINING_MARK;
 539          }
 540  
 541          // Step 6. Each code point in the label must only have certain status values according to
 542          // Section 5, IDNA Mapping Table:
 543          $transitional = $options['Transitional_Processing'];
 544          $useSTD3ASCIIRules = $options['UseSTD3ASCIIRules'];
 545  
 546          foreach ($codePoints as $codePoint) {
 547              $data = self::lookupCodePointStatus($codePoint, $useSTD3ASCIIRules);
 548              $status = $data['status'];
 549  
 550              if ('valid' === $status || (!$transitional && 'deviation' === $status)) {
 551                  continue;
 552              }
 553  
 554              $info->errors |= self::ERROR_DISALLOWED;
 555  
 556              break;
 557          }
 558  
 559          // Step 7. If CheckJoiners, the label must satisify the ContextJ rules from Appendix A, in
 560          // The Unicode Code Points and Internationalized Domain Names for Applications (IDNA)
 561          // [IDNA2008].
 562          if ($options['CheckJoiners'] && !self::isValidContextJ($codePoints, $label)) {
 563              $info->errors |= self::ERROR_CONTEXTJ;
 564          }
 565  
 566          // Step 8. If CheckBidi, and if the domain name is a  Bidi domain name, then the label must
 567          // satisfy all six of the numbered conditions in [IDNA2008] RFC 5893, Section 2.
 568          if ($options['CheckBidi'] && (!$info->bidiDomain || $info->validBidiDomain)) {
 569              self::validateBidiLabel($label, $info);
 570          }
 571      }
 572  
 573      /**
 574       * @see https://tools.ietf.org/html/rfc3492#section-6.2
 575       *
 576       * @param string $input
 577       *
 578       * @return string
 579       */
 580      private static function punycodeDecode($input)
 581      {
 582          $n = self::INITIAL_N;
 583          $out = 0;
 584          $i = 0;
 585          $bias = self::INITIAL_BIAS;
 586          $lastDelimIndex = strrpos($input, self::DELIMITER);
 587          $b = false === $lastDelimIndex ? 0 : $lastDelimIndex;
 588          $inputLength = \strlen($input);
 589          $output = [];
 590          $bytes = array_map('ord', str_split($input));
 591  
 592          for ($j = 0; $j < $b; ++$j) {
 593              if ($bytes[$j] > 0x7F) {
 594                  throw new \Exception('Invalid input');
 595              }
 596  
 597              $output[$out++] = $input[$j];
 598          }
 599  
 600          if ($b > 0) {
 601              ++$b;
 602          }
 603  
 604          for ($in = $b; $in < $inputLength; ++$out) {
 605              $oldi = $i;
 606              $w = 1;
 607  
 608              for ($k = self::BASE; /* no condition */; $k += self::BASE) {
 609                  if ($in >= $inputLength) {
 610                      throw new \Exception('Invalid input');
 611                  }
 612  
 613                  $digit = self::$basicToDigit[$bytes[$in++] & 0xFF];
 614  
 615                  if ($digit < 0) {
 616                      throw new \Exception('Invalid input');
 617                  }
 618  
 619                  if ($digit > intdiv(self::MAX_INT - $i, $w)) {
 620                      throw new \Exception('Integer overflow');
 621                  }
 622  
 623                  $i += $digit * $w;
 624  
 625                  if ($k <= $bias) {
 626                      $t = self::TMIN;
 627                  } elseif ($k >= $bias + self::TMAX) {
 628                      $t = self::TMAX;
 629                  } else {
 630                      $t = $k - $bias;
 631                  }
 632  
 633                  if ($digit < $t) {
 634                      break;
 635                  }
 636  
 637                  $baseMinusT = self::BASE - $t;
 638  
 639                  if ($w > intdiv(self::MAX_INT, $baseMinusT)) {
 640                      throw new \Exception('Integer overflow');
 641                  }
 642  
 643                  $w *= $baseMinusT;
 644              }
 645  
 646              $outPlusOne = $out + 1;
 647              $bias = self::adaptBias($i - $oldi, $outPlusOne, 0 === $oldi);
 648  
 649              if (intdiv($i, $outPlusOne) > self::MAX_INT - $n) {
 650                  throw new \Exception('Integer overflow');
 651              }
 652  
 653              $n += intdiv($i, $outPlusOne);
 654              $i %= $outPlusOne;
 655              array_splice($output, $i++, 0, [mb_chr($n, 'utf-8')]);
 656          }
 657  
 658          return implode('', $output);
 659      }
 660  
 661      /**
 662       * @see https://tools.ietf.org/html/rfc3492#section-6.3
 663       *
 664       * @param string $input
 665       *
 666       * @return string
 667       */
 668      private static function punycodeEncode($input)
 669      {
 670          $n = self::INITIAL_N;
 671          $delta = 0;
 672          $out = 0;
 673          $bias = self::INITIAL_BIAS;
 674          $inputLength = 0;
 675          $output = '';
 676          $iter = self::utf8Decode($input);
 677  
 678          foreach ($iter as $codePoint) {
 679              ++$inputLength;
 680  
 681              if ($codePoint < 0x80) {
 682                  $output .= \chr($codePoint);
 683                  ++$out;
 684              }
 685          }
 686  
 687          $h = $out;
 688          $b = $out;
 689  
 690          if ($b > 0) {
 691              $output .= self::DELIMITER;
 692              ++$out;
 693          }
 694  
 695          while ($h < $inputLength) {
 696              $m = self::MAX_INT;
 697  
 698              foreach ($iter as $codePoint) {
 699                  if ($codePoint >= $n && $codePoint < $m) {
 700                      $m = $codePoint;
 701                  }
 702              }
 703  
 704              if ($m - $n > intdiv(self::MAX_INT - $delta, $h + 1)) {
 705                  throw new \Exception('Integer overflow');
 706              }
 707  
 708              $delta += ($m - $n) * ($h + 1);
 709              $n = $m;
 710  
 711              foreach ($iter as $codePoint) {
 712                  if ($codePoint < $n && 0 === ++$delta) {
 713                      throw new \Exception('Integer overflow');
 714                  }
 715  
 716                  if ($codePoint === $n) {
 717                      $q = $delta;
 718  
 719                      for ($k = self::BASE; /* no condition */; $k += self::BASE) {
 720                          if ($k <= $bias) {
 721                              $t = self::TMIN;
 722                          } elseif ($k >= $bias + self::TMAX) {
 723                              $t = self::TMAX;
 724                          } else {
 725                              $t = $k - $bias;
 726                          }
 727  
 728                          if ($q < $t) {
 729                              break;
 730                          }
 731  
 732                          $qMinusT = $q - $t;
 733                          $baseMinusT = self::BASE - $t;
 734                          $output .= self::encodeDigit($t + $qMinusT % $baseMinusT, false);
 735                          ++$out;
 736                          $q = intdiv($qMinusT, $baseMinusT);
 737                      }
 738  
 739                      $output .= self::encodeDigit($q, false);
 740                      ++$out;
 741                      $bias = self::adaptBias($delta, $h + 1, $h === $b);
 742                      $delta = 0;
 743                      ++$h;
 744                  }
 745              }
 746  
 747              ++$delta;
 748              ++$n;
 749          }
 750  
 751          return $output;
 752      }
 753  
 754      /**
 755       * @see https://tools.ietf.org/html/rfc3492#section-6.1
 756       *
 757       * @param int  $delta
 758       * @param int  $numPoints
 759       * @param bool $firstTime
 760       *
 761       * @return int
 762       */
 763      private static function adaptBias($delta, $numPoints, $firstTime)
 764      {
 765          // xxx >> 1 is a faster way of doing intdiv(xxx, 2)
 766          $delta = $firstTime ? intdiv($delta, self::DAMP) : $delta >> 1;
 767          $delta += intdiv($delta, $numPoints);
 768          $k = 0;
 769  
 770          while ($delta > ((self::BASE - self::TMIN) * self::TMAX) >> 1) {
 771              $delta = intdiv($delta, self::BASE - self::TMIN);
 772              $k += self::BASE;
 773          }
 774  
 775          return $k + intdiv((self::BASE - self::TMIN + 1) * $delta, $delta + self::SKEW);
 776      }
 777  
 778      /**
 779       * @param int  $d
 780       * @param bool $flag
 781       *
 782       * @return string
 783       */
 784      private static function encodeDigit($d, $flag)
 785      {
 786          return \chr($d + 22 + 75 * ($d < 26 ? 1 : 0) - (($flag ? 1 : 0) << 5));
 787      }
 788  
 789      /**
 790       * Takes a UTF-8 encoded string and converts it into a series of integer code points. Any
 791       * invalid byte sequences will be replaced by a U+FFFD replacement code point.
 792       *
 793       * @see https://encoding.spec.whatwg.org/#utf-8-decoder
 794       *
 795       * @param string $input
 796       *
 797       * @return array<int, int>
 798       */
 799      private static function utf8Decode($input)
 800      {
 801          $bytesSeen = 0;
 802          $bytesNeeded = 0;
 803          $lowerBoundary = 0x80;
 804          $upperBoundary = 0xBF;
 805          $codePoint = 0;
 806          $codePoints = [];
 807          $length = \strlen($input);
 808  
 809          for ($i = 0; $i < $length; ++$i) {
 810              $byte = \ord($input[$i]);
 811  
 812              if (0 === $bytesNeeded) {
 813                  if ($byte >= 0x00 && $byte <= 0x7F) {
 814                      $codePoints[] = $byte;
 815  
 816                      continue;
 817                  }
 818  
 819                  if ($byte >= 0xC2 && $byte <= 0xDF) {
 820                      $bytesNeeded = 1;
 821                      $codePoint = $byte & 0x1F;
 822                  } elseif ($byte >= 0xE0 && $byte <= 0xEF) {
 823                      if (0xE0 === $byte) {
 824                          $lowerBoundary = 0xA0;
 825                      } elseif (0xED === $byte) {
 826                          $upperBoundary = 0x9F;
 827                      }
 828  
 829                      $bytesNeeded = 2;
 830                      $codePoint = $byte & 0xF;
 831                  } elseif ($byte >= 0xF0 && $byte <= 0xF4) {
 832                      if (0xF0 === $byte) {
 833                          $lowerBoundary = 0x90;
 834                      } elseif (0xF4 === $byte) {
 835                          $upperBoundary = 0x8F;
 836                      }
 837  
 838                      $bytesNeeded = 3;
 839                      $codePoint = $byte & 0x7;
 840                  } else {
 841                      $codePoints[] = 0xFFFD;
 842                  }
 843  
 844                  continue;
 845              }
 846  
 847              if ($byte < $lowerBoundary || $byte > $upperBoundary) {
 848                  $codePoint = 0;
 849                  $bytesNeeded = 0;
 850                  $bytesSeen = 0;
 851                  $lowerBoundary = 0x80;
 852                  $upperBoundary = 0xBF;
 853                  --$i;
 854                  $codePoints[] = 0xFFFD;
 855  
 856                  continue;
 857              }
 858  
 859              $lowerBoundary = 0x80;
 860              $upperBoundary = 0xBF;
 861              $codePoint = ($codePoint << 6) | ($byte & 0x3F);
 862  
 863              if (++$bytesSeen !== $bytesNeeded) {
 864                  continue;
 865              }
 866  
 867              $codePoints[] = $codePoint;
 868              $codePoint = 0;
 869              $bytesNeeded = 0;
 870              $bytesSeen = 0;
 871          }
 872  
 873          // String unexpectedly ended, so append a U+FFFD code point.
 874          if (0 !== $bytesNeeded) {
 875              $codePoints[] = 0xFFFD;
 876          }
 877  
 878          return $codePoints;
 879      }
 880  
 881      /**
 882       * @param int  $codePoint
 883       * @param bool $useSTD3ASCIIRules
 884       *
 885       * @return array{status: string, mapping?: string}
 886       */
 887      private static function lookupCodePointStatus($codePoint, $useSTD3ASCIIRules)
 888      {
 889          if (!self::$mappingTableLoaded) {
 890              self::$mappingTableLoaded = true;
 891              self::$mapped = require  __DIR__.'/Resources/unidata/mapped.php';
 892              self::$ignored = require  __DIR__.'/Resources/unidata/ignored.php';
 893              self::$deviation = require  __DIR__.'/Resources/unidata/deviation.php';
 894              self::$disallowed = require  __DIR__.'/Resources/unidata/disallowed.php';
 895              self::$disallowed_STD3_mapped = require  __DIR__.'/Resources/unidata/disallowed_STD3_mapped.php';
 896              self::$disallowed_STD3_valid = require  __DIR__.'/Resources/unidata/disallowed_STD3_valid.php';
 897          }
 898  
 899          if (isset(self::$mapped[$codePoint])) {
 900              return ['status' => 'mapped', 'mapping' => self::$mapped[$codePoint]];
 901          }
 902  
 903          if (isset(self::$ignored[$codePoint])) {
 904              return ['status' => 'ignored'];
 905          }
 906  
 907          if (isset(self::$deviation[$codePoint])) {
 908              return ['status' => 'deviation', 'mapping' => self::$deviation[$codePoint]];
 909          }
 910  
 911          if (isset(self::$disallowed[$codePoint]) || DisallowedRanges::inRange($codePoint)) {
 912              return ['status' => 'disallowed'];
 913          }
 914  
 915          $isDisallowedMapped = isset(self::$disallowed_STD3_mapped[$codePoint]);
 916  
 917          if ($isDisallowedMapped || isset(self::$disallowed_STD3_valid[$codePoint])) {
 918              $status = 'disallowed';
 919  
 920              if (!$useSTD3ASCIIRules) {
 921                  $status = $isDisallowedMapped ? 'mapped' : 'valid';
 922              }
 923  
 924              if ($isDisallowedMapped) {
 925                  return ['status' => $status, 'mapping' => self::$disallowed_STD3_mapped[$codePoint]];
 926              }
 927  
 928              return ['status' => $status];
 929          }
 930  
 931          return ['status' => 'valid'];
 932      }
 933  }


Generated: Mon Mar 31 05:10:02 2025 Cross-referenced by PHPXref 0.7.1