[ Index ]

PHP Cross Reference of YOURLS

title

Body

[close]

/tests/tests/format/ -> urls.php (source)

   1  <?php
   2  
   3  /**
   4   * Formatting functions for URLs
   5   *
   6   * @group formatting
   7   * @group url
   8   * @group idn
   9   * @since 0.1
  10   */
  11  class Format_URL extends PHPUnit\Framework\TestCase {
  12  
  13      protected function tearDown(): void {
  14          yourls_remove_filter( 'is_ssl', 'yourls_return_true' );
  15          yourls_remove_filter( 'is_ssl', 'yourls_return_false' );
  16      }
  17  
  18      /**
  19       * List of schemes to test. Structure: array( string to test, expected scheme )
  20       */
  21      function list_of_schemes() {
  22          return array(
  23             array( 'example:80/blah'             , 'example:' ),
  24             array( 'example.com/blah'            , '' ),
  25             array( 'example.com:80/blah'         , 'example.com:' ),
  26             array( 'scheme://example.com:80/blah', 'scheme://' ),
  27             array( 'scheme:example.com'          , 'scheme:' ),
  28             array( 'scheme:/example.com:80/hey'  , 'scheme:' ),
  29             array( 'scheme:/example:80/hey'      , 'scheme:' ),
  30             array( 'scheme://example'            , 'scheme://' ),
  31             array( 'scheme:///example'           , 'scheme://' ),
  32             array( 'scheme+bleh:example'         , 'scheme+bleh:' ),
  33             array( 'scheme :example'             , '' ),
  34             array( 'scheme+bleh : example'       , '' ),
  35             array( 'scheme45:example'            , 'scheme45:' ),
  36             array( '45scheme:example'            , '' ),
  37             array( 'scheme+-.1337:example'       , 'scheme+-.1337:' ),
  38             array( '+scheme:example'             , '' ),
  39             array( 'scheme'                      , '' ),
  40          );
  41      }
  42  
  43      /**
  44       * Correctly get protocols
  45       *
  46       * @since 0.1
  47       * @dataProvider list_of_schemes
  48       */
  49      function test_correcttly_get_protocols( $test_this, $expected ) {
  50          $this->assertSame( yourls_get_protocol( $test_this ), $expected );
  51      }
  52  
  53  
  54      /**
  55       * List of valid URLs that should not be changed when sanitized
  56       */
  57      function list_of_valid_URLs() {
  58          return array(
  59              array( 'http://example.com' ),
  60              array( 'http://example.com/' ),
  61              array( 'http://[email protected]/' ),
  62              array( 'http://example.com/?@OMG' ), // #1890
  63              array( 'http://[email protected]#BLAH' ),
  64              array( 'http://Ozh:[email protected]/' ),
  65              array( 'http://Ozh:[email protected]#OMG' ),
  66              array( 'http://Ozh:[email protected]:1337/' ),
  67              array( 'http://Ozh:[email protected]:1337#OMG' ),
  68              array( 'http://Ozh:[email protected]/hey@ho' ),
  69              array( 'http://username:[email protected]:8042/over/there/index.dtb?type=animal&name=narwhal#nose:@:@' ),
  70              array( 'mailto:[email protected]' ),
  71              array( 'http://example.com/?watchtheallowedcharacters-~+_.#=&;,/:%!*stay' ),
  72              array( 'http://example.com/search.php?search=(amistillhere)' ),
  73              array( 'http://example.com/?test=%2812345%29abcdef[gh]' ),
  74              array( 'http://example.com/?test=(12345)abcdef[gh]' ),
  75              array( 'http://[0:0:0:0:0:0:0:1]/' ),
  76              array( 'http://[2001:db8:1f70::999:de8:7648:6e8]:100/' ),
  77              array( 'http://example.com/?req=http;//blah' ), //
  78              array( 'relative' ),
  79              array( 'Relative/path/' ),
  80              array( 'relative/Path/#yes' ),
  81              array( '/absolute' ),
  82              array( '/Absolute/Path/' ),
  83              array( '/absolute/path/?omg#also' ),
  84              array( 'http://académie-française.fr' ),
  85              array( 'http://www.طارق.net/طارق?hello=%2B' ),
  86              array( 'http://%d8%b7%d8%a7%d8%b1%d9%82.net/' ), // this is طارق.net, encoded. I _think_ it qualifies as valid
  87          );
  88      }
  89  
  90      /**
  91       * Test that valid URLs are not modified
  92       *
  93       * @since 0.1
  94       * @dataProvider list_of_valid_URLs
  95       */
  96      function test_valid_urls( $url ) {
  97          $this->assertEquals( $url, yourls_sanitize_url( $url ) );
  98      }
  99  
 100      /**
 101       * URL with spaces
 102       *
 103       * @since 0.1
 104       */
 105  	function test_url_with_spaces() {
 106          $this->assertEquals( 'http://example.com/HelloWorld', yourls_sanitize_url( 'http://example.com/Hello World' ) );
 107          $this->assertEquals( 'http://example.com/Hello%20World', yourls_sanitize_url( 'http://example.com/Hello%20World' ) );
 108          $this->assertEquals( 'http://example.com/', yourls_sanitize_url( 'http://example.com/ ' ) );
 109          $this->assertEquals( 'http://example.com/', yourls_sanitize_url( ' http://example.com/' ) );
 110          $this->assertEquals( 'http://example.com/', yourls_sanitize_url( ' http://example.com/ ' ) );
 111      }
 112  
 113      /**
 114       * URL with bad chars
 115       *
 116       * @since 0.1
 117       */
 118  	function test_url_with_bad_characters() {
 119          // regular sanitize leaves %0A & %0D alone
 120          $this->assertEquals( 'http://example.com/keep%0Dlinefeed%0A', yourls_sanitize_url( 'http://example.com/keep%0Dlinefeed%0A' ) );
 121          $this->assertEquals( 'http://example.com/%0%0%0DAD', yourls_sanitize_url( 'http://example.com/%0%0%0DAD' ) );
 122  
 123          // sanitize with anti CRLF
 124          $this->assertEquals( 'http://example.com/watchthelinefeedgo', yourls_sanitize_url_safe( 'http://example.com/watchthelinefeed%0Ago' ) );
 125          $this->assertEquals( 'http://example.com/watchthelinefeedgo', yourls_sanitize_url_safe( 'http://example.com/watchthelinefeed%0ago' ) );
 126          $this->assertEquals( 'http://example.com/watchthecarriagereturngo', yourls_sanitize_url_safe( 'http://example.com/watchthecarriagereturn%0Dgo' ) );
 127          $this->assertEquals( 'http://example.com/watchthecarriagereturngo', yourls_sanitize_url_safe( 'http://example.com/watchthecarriagereturn%0dgo' ) );
 128  
 129          //Nesting Checks
 130          $this->assertEquals( 'http://example.com/watchthecarriagereturngo', yourls_sanitize_url_safe( 'http://example.com/watchthecarriagereturn%0%0ddgo' ) );
 131          $this->assertEquals( 'http://example.com/watchthecarriagereturngo', yourls_sanitize_url_safe( 'http://example.com/watchthecarriagereturn%0%0DDgo' ) );
 132          $this->assertEquals( 'http://example.com/', yourls_sanitize_url_safe( 'http://example.com/%0%0%0DAD' ) );
 133          $this->assertEquals( 'http://example.com/', yourls_sanitize_url_safe( 'http://example.com/%0%0%0ADA' ) );
 134          $this->assertEquals( 'http://example.com/', yourls_sanitize_url_safe( 'http://example.com/%0%0%0DAd' ) );
 135          $this->assertEquals( 'http://example.com/', yourls_sanitize_url_safe( 'http://example.com/%0%0%0ADa' ) );
 136      }
 137  
 138      /**
 139       * Test valid, missing and fake protocols
 140       *
 141       * @since 0.1
 142       */
 143  	function test_url_with_protocols() {
 144          $this->assertEquals( 'http://example.com', yourls_sanitize_url( 'http://example.com' ) );
 145          $this->assertEquals( 'example.php', yourls_sanitize_url( 'example.php' ) );
 146          $this->assertEquals( '', yourls_sanitize_url( 'htttp://example.com' ) );
 147          $this->assertEquals( 'mailto:[email protected]', yourls_sanitize_url( 'mailto:[email protected]' ) );
 148          // play with allowed protocols
 149          $this->assertEquals( '', yourls_sanitize_url( 'nasty://example.com/' ) );
 150          $this->assertEquals( 'nasty://example.com/', yourls_sanitize_url( 'nasty://example.com/', array('nasty://') ) );
 151          global $yourls_allowedprotocols;
 152          $yourls_allowedprotocols[] = 'evil://';
 153          $this->assertEquals( 'evil://example.com', yourls_sanitize_url( 'evil://example.com' ) );
 154          $yourls_allowedprotocols = yourls_kses_allowed_protocols();
 155      }
 156  
 157      /**
 158       * List of URLs with MiXeD CaSe to test. Structure: array( sanitized url, unsanitized url with mixed case )
 159       */
 160      function list_of_mixed_case() {
 161          return array(
 162              array( 'http://example.com'                               , 'http://example.com' ),    # normal, no trailing slash
 163              array( 'http://example.com/'                              , 'http://example.com/' ),   # normal, trailing slash
 164              array( 'http://example.com'                               , 'HTTP://example.com' ),
 165              array( 'http://example.com'                               , 'Http://example.com' ),
 166              array( 'http://example.com'                               , 'Http://ExAmPlE.com' ),
 167              array( 'http://example.com/BLAH'                          , 'Http://ExAmPlE.com/BLAH' ),
 168              array( 'http://http/HTTP?HTTP#HTTP'                       , 'HTTP://HTTP/HTTP?HTTP#HTTP' ),
 169              array( 'http://example.com/?@BLaH'                        , 'Http://ExAmPlE.com/?@BLaH' ), #1890
 170              array( 'http://example.com#BLAH'                          , 'Http://ExAmPlE.com#BLAH' ),
 171              array( 'http://example.com#BLAH'                          , 'Http://@ExAmPlE.com#BLAH' ),
 172              array( 'http://example.com#BLAH'                          , 'Http://:@ExAmPlE.com#BLAH' ),
 173              array( 'http://example.com?BLAH'                          , 'Http://ExAmPlE.com?BLAH' ),
 174              array( 'http://Ozh:[email protected]:1337#OMG'         , 'http://Ozh:[email protected]:1337#OMG' ),
 175              array( 'http://User:[email protected]?User:[email protected]' , 'http://User:[email protected]?User:[email protected]' ),
 176              array( 'mailto:[email protected]?omg'                           , 'MAILTO:[email protected]?omg' ),
 177              array( 'http://www.طارق.net/'                             , 'http://www.طارق.Net/' ),
 178              array( 'http://académie-française.fr'                     , 'http://Académie-française.FR' ),
 179          );
 180      }
 181  
 182      /**
 183       * Protocol and domain with mixed case
 184       *
 185       * @since 0.1
 186       * @dataProvider list_of_mixed_case
 187       */
 188  	function test_url_with_protocol_case( $sanitized, $unsanitized ) {
 189          $this->assertEquals( $sanitized, yourls_sanitize_url( $unsanitized ) );
 190      }
 191  
 192      /**
 193       * List of URLs with IDN domain, and how YOURLS should sanitize them
 194       */
 195      function list_of_IDN() {
 196          return array(
 197              array( 'http://www.طارق.Net/Omgطارق'                  , 'http://www.طارق.net/Omgطارق' ),
 198              array( 'http://xn--mgbuq0c.Net/Omgطارق'               , 'http://طارق.net/Omgطارق' ),
 199              array( 'http://%d8%b7%d8%a7%d8%b1%d9%82.Net/Omgطارق'  , 'http://%d8%b7%d8%a7%d8%b1%d9%82.net/Omgطارق' ), // طارق.net, urlencoded
 200              array( 'http://xn--p1ai.РФ'                           , 'http://рф.рф' ), // lowercasing where applicable: РФ -> рф
 201              array( 'http://РФ.xn--p1ai/'                          , 'http://рф.рф/' ),
 202              array( 'http://xn--p1ai.xn--p1ai'                     , 'http://рф.рф' ),
 203          );
 204      }
 205  
 206      /**
 207       * Protocol and domain with mixed case
 208       *
 209       * @dataProvider list_of_IDN
 210       */
 211  	function test_url_with_IDN( $unsanitized, $sanitized ) {
 212          $this->assertEquals( $sanitized, yourls_sanitize_url( $unsanitized ) );
 213      }
 214  
 215      /**
 216       * List of URLS and expected matches whether we're on SSL or not.
 217       * Structure: array(original URL, expected URL if we're on HTTP, expected URL if we're on HTTPS)
 218       */
 219      function list_of_urls_with_and_without_https() {
 220          return array(
 221              array( 'http://omg',         'http://omg',        'https://omg' ),
 222              array( 'https://omg',        'https://omg',       'https://omg' ),
 223              array( 'http://omg?http',    'http://omg?http',   'https://omg?http' ),
 224              array( 'https://omg?http',   'https://omg?http',  'https://omg?http' ),
 225              array( 'omg?http://bleh',    'omg?http://bleh',   'omg?http://bleh' ),
 226              array( 'omg?https://bleh',   'omg?https://bleh',  'omg?https://bleh' ),
 227              array( 'http',               'http',              'http' ),
 228              array( 'https',              'https',             'https' ),
 229              array( 'http://https',       'http://https',      'https://https' ),
 230              array( 'https://https',      'https://https',     'https://https' ),
 231          );
 232      }
 233      /**
 234       * Test matching protocol with no SSL
 235       *
 236       * Feed URL and return a result that matches "http"
 237       *
 238       * @dataProvider list_of_urls_with_and_without_https
 239       */
 240      function test_matching_protocols_with_no_ssl( $url, $without_ssl, $with_ssl ) {
 241          yourls_add_filter('is_ssl', 'yourls_return_false');
 242          $this->assertEquals( $without_ssl, yourls_match_current_protocol($url) );
 243      }
 244  
 245      /**
 246       * Test matching protocol with SSL
 247       *
 248       * Feed URL and return a result that matches "https"
 249       *
 250       * @dataProvider list_of_urls_with_and_without_https
 251       */
 252      function test_matching_protocols_with_ssl( $url, $without_ssl, $with_ssl ) {
 253          yourls_add_filter('is_ssl', 'yourls_return_true');
 254          $this->assertEquals( $with_ssl, yourls_match_current_protocol($url) );
 255      }
 256  
 257      /**
 258       * List of various valid URL with mixed scenarios of IDN
 259       * Structure: array(URL, expected URL after yourls_sanitize_url (and especially yourls_normalize_uri(), which deals with IDN)
 260       */
 261      function list_of_idn_punycode_utf8_rtl() {
 262          return array(
 263              [ 'http://ua-test.link'                   , 'http://ua-test.link' ],                    // Ascii.new
 264              [ 'http://ua-test.technology'             , 'http://ua-test.technology' ],              // Ascii.long
 265              [ 'http://普试.top/'                      , 'http://普试.top/' ],                       // Idn.ascii
 266              [ 'http://ua-test.世界'                   , 'http://ua-test.世界' ],                    // Ascii.idn
 267              [ 'http://普试.世界/'                     , 'http://普试.世界/' ],                      // Idn.idn
 268              [ 'http://ua-test.xn--rhqv96g'            , 'http://ua-test.世界' ],                   // Ascii.punycode
 269              [ 'http://xn--tkvo64f.top'                , 'http://普试.top' ],                       // Punycode.ascii
 270              [ 'http://xn--tkvo64f.xn--rhqv96g'        , 'http://普试.世界'  ],                     // Punycode.punycode
 271              [ 'http://اختبار-القبولالعالمي.top'        , 'http://اختبار-القبولالعالمي.top' ],       // RTL.ascii
 272              [ 'http://اختبار-القبولالعالمي.شبكة'       , 'http://اختبار-القبولالعالمي.شبكة' ],      // RTL.RTL
 273              [ 'http://ua-test.link/我的'              , 'http://ua-test.link/我的' ],             // Ascii.new/Unicode
 274              [ 'http://ua-test.technology/我的'        , 'http://ua-test.technology/我的' ],       // Ascii.long/Unicode
 275              [ 'http://普试.top/我的'                  , 'http://普试.top/我的' ],                  // Idn.ascii/Unicode
 276              [ 'http://ua-test.世界/我的'              , 'http://ua-test.世界/我的' ],              // Ascii.idn/Unicode
 277              [ 'http://普试.世界/我的'                 , 'http://普试.世界/我的' ],                 // Idn.idn/Unicode
 278              [ 'http://ختبار-القبولالعالمي.top/我的'    , 'http://ختبار-القبولالعالمي.top/我的' ],   // RTL.ascii/Unicode
 279              [ 'http://اختبار-القبولالعالمي.شبكة/我的'  , 'http://اختبار-القبولالعالمي.شبكة/我的' ], // RTL.RTL/Unicode
 280  
 281                                                        // Damn, due to these UTF8 chars not being fixed width, we cannot neatly
 282                                                        // justify the code and comments. How disappointing.
 283          );
 284      }
 285  
 286      /**
 287       * Test various cases : domain name / TLD / path with ascii, punycode, utf8 and RTL
 288       *
 289       * @dataProvider list_of_idn_punycode_utf8_rtl
 290       */
 291      function test_various_idn_cases($url, $expected) {
 292          $this->assertEquals( yourls_sanitize_url($url), $expected );
 293      }
 294  
 295  }


Generated: Tue Jan 21 05:10:11 2025 Cross-referenced by PHPXref 0.7.1