[ Index ] |
PHP Cross Reference of YOURLS |
[Summary view] [Print] [Text view]
1 <?php 2 3 /** 4 * Formatting functions for URLs 5 * 6 * @group formatting 7 * @group url 8 * @group idn 9 * @since 0.1 10 */ 11 class Format_URL extends PHPUnit\Framework\TestCase { 12 13 protected function tearDown(): void { 14 yourls_remove_filter( 'is_ssl', 'yourls_return_true' ); 15 yourls_remove_filter( 'is_ssl', 'yourls_return_false' ); 16 } 17 18 /** 19 * List of schemes to test. Structure: array( string to test, expected scheme ) 20 */ 21 function list_of_schemes() { 22 return array( 23 array( 'example:80/blah' , 'example:' ), 24 array( 'example.com/blah' , '' ), 25 array( 'example.com:80/blah' , 'example.com:' ), 26 array( 'scheme://example.com:80/blah', 'scheme://' ), 27 array( 'scheme:example.com' , 'scheme:' ), 28 array( 'scheme:/example.com:80/hey' , 'scheme:' ), 29 array( 'scheme:/example:80/hey' , 'scheme:' ), 30 array( 'scheme://example' , 'scheme://' ), 31 array( 'scheme:///example' , 'scheme://' ), 32 array( 'scheme+bleh:example' , 'scheme+bleh:' ), 33 array( 'scheme :example' , '' ), 34 array( 'scheme+bleh : example' , '' ), 35 array( 'scheme45:example' , 'scheme45:' ), 36 array( '45scheme:example' , '' ), 37 array( 'scheme+-.1337:example' , 'scheme+-.1337:' ), 38 array( '+scheme:example' , '' ), 39 array( 'scheme' , '' ), 40 ); 41 } 42 43 /** 44 * Correctly get protocols 45 * 46 * @since 0.1 47 * @dataProvider list_of_schemes 48 */ 49 function test_correcttly_get_protocols( $test_this, $expected ) { 50 $this->assertSame( yourls_get_protocol( $test_this ), $expected ); 51 } 52 53 54 /** 55 * List of valid URLs that should not be changed when sanitized 56 */ 57 function list_of_valid_URLs() { 58 return array( 59 array( 'http://example.com' ), 60 array( 'http://example.com/' ), 61 array( 'http://[email protected]/' ), 62 array( 'http://example.com/?@OMG' ), // #1890 63 array( 'http://[email protected]#BLAH' ), 64 array( 'http://Ozh:[email protected]/' ), 65 array( 'http://Ozh:[email protected]#OMG' ), 66 array( 'http://Ozh:[email protected]:1337/' ), 67 array( 'http://Ozh:[email protected]:1337#OMG' ), 68 array( 'http://Ozh:[email protected]/hey@ho' ), 69 array( 'http://username:[email protected]:8042/over/there/index.dtb?type=animal&name=narwhal#nose:@:@' ), 70 array( 'mailto:[email protected]' ), 71 array( 'http://example.com/?watchtheallowedcharacters-~+_.#=&;,/:%!*stay' ), 72 array( 'http://example.com/search.php?search=(amistillhere)' ), 73 array( 'http://example.com/?test=%2812345%29abcdef[gh]' ), 74 array( 'http://example.com/?test=(12345)abcdef[gh]' ), 75 array( 'http://[0:0:0:0:0:0:0:1]/' ), 76 array( 'http://[2001:db8:1f70::999:de8:7648:6e8]:100/' ), 77 array( 'http://example.com/?req=http;//blah' ), // 78 array( 'relative' ), 79 array( 'Relative/path/' ), 80 array( 'relative/Path/#yes' ), 81 array( '/absolute' ), 82 array( '/Absolute/Path/' ), 83 array( '/absolute/path/?omg#also' ), 84 array( 'http://académie-française.fr' ), 85 array( 'http://www.طارق.net/طارق?hello=%2B' ), 86 array( 'http://%d8%b7%d8%a7%d8%b1%d9%82.net/' ), // this is طارق.net, encoded. I _think_ it qualifies as valid 87 ); 88 } 89 90 /** 91 * Test that valid URLs are not modified 92 * 93 * @since 0.1 94 * @dataProvider list_of_valid_URLs 95 */ 96 function test_valid_urls( $url ) { 97 $this->assertEquals( $url, yourls_sanitize_url( $url ) ); 98 } 99 100 /** 101 * URL with spaces 102 * 103 * @since 0.1 104 */ 105 function test_url_with_spaces() { 106 $this->assertEquals( 'http://example.com/HelloWorld', yourls_sanitize_url( 'http://example.com/Hello World' ) ); 107 $this->assertEquals( 'http://example.com/Hello%20World', yourls_sanitize_url( 'http://example.com/Hello%20World' ) ); 108 $this->assertEquals( 'http://example.com/', yourls_sanitize_url( 'http://example.com/ ' ) ); 109 $this->assertEquals( 'http://example.com/', yourls_sanitize_url( ' http://example.com/' ) ); 110 $this->assertEquals( 'http://example.com/', yourls_sanitize_url( ' http://example.com/ ' ) ); 111 } 112 113 /** 114 * URL with bad chars 115 * 116 * @since 0.1 117 */ 118 function test_url_with_bad_characters() { 119 // regular sanitize leaves %0A & %0D alone 120 $this->assertEquals( 'http://example.com/keep%0Dlinefeed%0A', yourls_sanitize_url( 'http://example.com/keep%0Dlinefeed%0A' ) ); 121 $this->assertEquals( 'http://example.com/%0%0%0DAD', yourls_sanitize_url( 'http://example.com/%0%0%0DAD' ) ); 122 123 // sanitize with anti CRLF 124 $this->assertEquals( 'http://example.com/watchthelinefeedgo', yourls_sanitize_url_safe( 'http://example.com/watchthelinefeed%0Ago' ) ); 125 $this->assertEquals( 'http://example.com/watchthelinefeedgo', yourls_sanitize_url_safe( 'http://example.com/watchthelinefeed%0ago' ) ); 126 $this->assertEquals( 'http://example.com/watchthecarriagereturngo', yourls_sanitize_url_safe( 'http://example.com/watchthecarriagereturn%0Dgo' ) ); 127 $this->assertEquals( 'http://example.com/watchthecarriagereturngo', yourls_sanitize_url_safe( 'http://example.com/watchthecarriagereturn%0dgo' ) ); 128 129 //Nesting Checks 130 $this->assertEquals( 'http://example.com/watchthecarriagereturngo', yourls_sanitize_url_safe( 'http://example.com/watchthecarriagereturn%0%0ddgo' ) ); 131 $this->assertEquals( 'http://example.com/watchthecarriagereturngo', yourls_sanitize_url_safe( 'http://example.com/watchthecarriagereturn%0%0DDgo' ) ); 132 $this->assertEquals( 'http://example.com/', yourls_sanitize_url_safe( 'http://example.com/%0%0%0DAD' ) ); 133 $this->assertEquals( 'http://example.com/', yourls_sanitize_url_safe( 'http://example.com/%0%0%0ADA' ) ); 134 $this->assertEquals( 'http://example.com/', yourls_sanitize_url_safe( 'http://example.com/%0%0%0DAd' ) ); 135 $this->assertEquals( 'http://example.com/', yourls_sanitize_url_safe( 'http://example.com/%0%0%0ADa' ) ); 136 } 137 138 /** 139 * Test valid, missing and fake protocols 140 * 141 * @since 0.1 142 */ 143 function test_url_with_protocols() { 144 $this->assertEquals( 'http://example.com', yourls_sanitize_url( 'http://example.com' ) ); 145 $this->assertEquals( 'example.php', yourls_sanitize_url( 'example.php' ) ); 146 $this->assertEquals( '', yourls_sanitize_url( 'htttp://example.com' ) ); 147 $this->assertEquals( 'mailto:[email protected]', yourls_sanitize_url( 'mailto:[email protected]' ) ); 148 // play with allowed protocols 149 $this->assertEquals( '', yourls_sanitize_url( 'nasty://example.com/' ) ); 150 $this->assertEquals( 'nasty://example.com/', yourls_sanitize_url( 'nasty://example.com/', array('nasty://') ) ); 151 global $yourls_allowedprotocols; 152 $yourls_allowedprotocols[] = 'evil://'; 153 $this->assertEquals( 'evil://example.com', yourls_sanitize_url( 'evil://example.com' ) ); 154 $yourls_allowedprotocols = yourls_kses_allowed_protocols(); 155 } 156 157 /** 158 * List of URLs with MiXeD CaSe to test. Structure: array( sanitized url, unsanitized url with mixed case ) 159 */ 160 function list_of_mixed_case() { 161 return array( 162 array( 'http://example.com' , 'http://example.com' ), # normal, no trailing slash 163 array( 'http://example.com/' , 'http://example.com/' ), # normal, trailing slash 164 array( 'http://example.com' , 'HTTP://example.com' ), 165 array( 'http://example.com' , 'Http://example.com' ), 166 array( 'http://example.com' , 'Http://ExAmPlE.com' ), 167 array( 'http://example.com/BLAH' , 'Http://ExAmPlE.com/BLAH' ), 168 array( 'http://http/HTTP?HTTP#HTTP' , 'HTTP://HTTP/HTTP?HTTP#HTTP' ), 169 array( 'http://example.com/?@BLaH' , 'Http://ExAmPlE.com/?@BLaH' ), #1890 170 array( 'http://example.com#BLAH' , 'Http://ExAmPlE.com#BLAH' ), 171 array( 'http://example.com#BLAH' , 'Http://@ExAmPlE.com#BLAH' ), 172 array( 'http://example.com#BLAH' , 'Http://:@ExAmPlE.com#BLAH' ), 173 array( 'http://example.com?BLAH' , 'Http://ExAmPlE.com?BLAH' ), 174 array( 'http://Ozh:[email protected]:1337#OMG' , 'http://Ozh:[email protected]:1337#OMG' ), 175 array( 'http://User:[email protected]?User:[email protected]' , 'http://User:[email protected]?User:[email protected]' ), 176 array( 'mailto:[email protected]?omg' , 'MAILTO:[email protected]?omg' ), 177 array( 'http://www.طارق.net/' , 'http://www.طارق.Net/' ), 178 array( 'http://académie-française.fr' , 'http://Académie-française.FR' ), 179 ); 180 } 181 182 /** 183 * Protocol and domain with mixed case 184 * 185 * @since 0.1 186 * @dataProvider list_of_mixed_case 187 */ 188 function test_url_with_protocol_case( $sanitized, $unsanitized ) { 189 $this->assertEquals( $sanitized, yourls_sanitize_url( $unsanitized ) ); 190 } 191 192 /** 193 * List of URLs with IDN domain, and how YOURLS should sanitize them 194 */ 195 function list_of_IDN() { 196 return array( 197 array( 'http://www.طارق.Net/Omgطارق' , 'http://www.طارق.net/Omgطارق' ), 198 array( 'http://xn--mgbuq0c.Net/Omgطارق' , 'http://طارق.net/Omgطارق' ), 199 array( 'http://%d8%b7%d8%a7%d8%b1%d9%82.Net/Omgطارق' , 'http://%d8%b7%d8%a7%d8%b1%d9%82.net/Omgطارق' ), // طارق.net, urlencoded 200 array( 'http://xn--p1ai.РФ' , 'http://рф.рф' ), // lowercasing where applicable: РФ -> рф 201 array( 'http://РФ.xn--p1ai/' , 'http://рф.рф/' ), 202 array( 'http://xn--p1ai.xn--p1ai' , 'http://рф.рф' ), 203 ); 204 } 205 206 /** 207 * Protocol and domain with mixed case 208 * 209 * @dataProvider list_of_IDN 210 */ 211 function test_url_with_IDN( $unsanitized, $sanitized ) { 212 $this->assertEquals( $sanitized, yourls_sanitize_url( $unsanitized ) ); 213 } 214 215 /** 216 * List of URLS and expected matches whether we're on SSL or not. 217 * Structure: array(original URL, expected URL if we're on HTTP, expected URL if we're on HTTPS) 218 */ 219 function list_of_urls_with_and_without_https() { 220 return array( 221 array( 'http://omg', 'http://omg', 'https://omg' ), 222 array( 'https://omg', 'https://omg', 'https://omg' ), 223 array( 'http://omg?http', 'http://omg?http', 'https://omg?http' ), 224 array( 'https://omg?http', 'https://omg?http', 'https://omg?http' ), 225 array( 'omg?http://bleh', 'omg?http://bleh', 'omg?http://bleh' ), 226 array( 'omg?https://bleh', 'omg?https://bleh', 'omg?https://bleh' ), 227 array( 'http', 'http', 'http' ), 228 array( 'https', 'https', 'https' ), 229 array( 'http://https', 'http://https', 'https://https' ), 230 array( 'https://https', 'https://https', 'https://https' ), 231 ); 232 } 233 /** 234 * Test matching protocol with no SSL 235 * 236 * Feed URL and return a result that matches "http" 237 * 238 * @dataProvider list_of_urls_with_and_without_https 239 */ 240 function test_matching_protocols_with_no_ssl( $url, $without_ssl, $with_ssl ) { 241 yourls_add_filter('is_ssl', 'yourls_return_false'); 242 $this->assertEquals( $without_ssl, yourls_match_current_protocol($url) ); 243 } 244 245 /** 246 * Test matching protocol with SSL 247 * 248 * Feed URL and return a result that matches "https" 249 * 250 * @dataProvider list_of_urls_with_and_without_https 251 */ 252 function test_matching_protocols_with_ssl( $url, $without_ssl, $with_ssl ) { 253 yourls_add_filter('is_ssl', 'yourls_return_true'); 254 $this->assertEquals( $with_ssl, yourls_match_current_protocol($url) ); 255 } 256 257 /** 258 * List of various valid URL with mixed scenarios of IDN 259 * Structure: array(URL, expected URL after yourls_sanitize_url (and especially yourls_normalize_uri(), which deals with IDN) 260 */ 261 function list_of_idn_punycode_utf8_rtl() { 262 return array( 263 [ 'http://ua-test.link' , 'http://ua-test.link' ], // Ascii.new 264 [ 'http://ua-test.technology' , 'http://ua-test.technology' ], // Ascii.long 265 [ 'http://普试.top/' , 'http://普试.top/' ], // Idn.ascii 266 [ 'http://ua-test.世界' , 'http://ua-test.世界' ], // Ascii.idn 267 [ 'http://普试.世界/' , 'http://普试.世界/' ], // Idn.idn 268 [ 'http://ua-test.xn--rhqv96g' , 'http://ua-test.世界' ], // Ascii.punycode 269 [ 'http://xn--tkvo64f.top' , 'http://普试.top' ], // Punycode.ascii 270 [ 'http://xn--tkvo64f.xn--rhqv96g' , 'http://普试.世界' ], // Punycode.punycode 271 [ 'http://اختبار-القبولالعالمي.top' , 'http://اختبار-القبولالعالمي.top' ], // RTL.ascii 272 [ 'http://اختبار-القبولالعالمي.شبكة' , 'http://اختبار-القبولالعالمي.شبكة' ], // RTL.RTL 273 [ 'http://ua-test.link/我的' , 'http://ua-test.link/我的' ], // Ascii.new/Unicode 274 [ 'http://ua-test.technology/我的' , 'http://ua-test.technology/我的' ], // Ascii.long/Unicode 275 [ 'http://普试.top/我的' , 'http://普试.top/我的' ], // Idn.ascii/Unicode 276 [ 'http://ua-test.世界/我的' , 'http://ua-test.世界/我的' ], // Ascii.idn/Unicode 277 [ 'http://普试.世界/我的' , 'http://普试.世界/我的' ], // Idn.idn/Unicode 278 [ 'http://ختبار-القبولالعالمي.top/我的' , 'http://ختبار-القبولالعالمي.top/我的' ], // RTL.ascii/Unicode 279 [ 'http://اختبار-القبولالعالمي.شبكة/我的' , 'http://اختبار-القبولالعالمي.شبكة/我的' ], // RTL.RTL/Unicode 280 281 // Damn, due to these UTF8 chars not being fixed width, we cannot neatly 282 // justify the code and comments. How disappointing. 283 ); 284 } 285 286 /** 287 * Test various cases : domain name / TLD / path with ascii, punycode, utf8 and RTL 288 * 289 * @dataProvider list_of_idn_punycode_utf8_rtl 290 */ 291 function test_various_idn_cases($url, $expected) { 292 $this->assertEquals( yourls_sanitize_url($url), $expected ); 293 } 294 295 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Tue Jan 21 05:10:11 2025 | Cross-referenced by PHPXref 0.7.1 |