diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..f4711d4 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,39 @@ +name: CI + +on: + - push + - pull_request + +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + node: + - 12 + - 14 + - 16 + - 17 + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-node@v2 + with: + node-version: ${{ matrix.node }} + - run: npm install + - run: npm test + - uses: coverallsapp/github-action@1.1.3 + if: matrix.node == 12 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + test-browser: + runs-on: ubuntu-latest + env: + SAUCE_USERNAME: url-parse + SAUCE_ACCESS_KEY: ${{ secrets.SAUCE_ACCESS_KEY }} + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-node@v2 + with: + node-version: 12 + - run: npm install + - run: npm run test-browser diff --git a/.gitignore b/.gitignore index dd5fe6b..eec701f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,4 @@ node_modules/ -.nyc_output/ coverage/ dist/ -npm-debug.log .tern-port diff --git a/.npmrc b/.npmrc new file mode 100644 index 0000000..43c97e7 --- /dev/null +++ b/.npmrc @@ -0,0 +1 @@ +package-lock=false diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 790ec10..0000000 --- a/.travis.yml +++ /dev/null @@ -1,26 +0,0 @@ -sudo: false -language: node_js -matrix: - fast_finish: true - include: - - node_js: "10" - env: SCRIPT=test - - node_js: "8" - env: SCRIPT=test - - node_js: "6" - env: SCRIPT=test - - node_js: "8" - env: - - secure: IF01oyIKSs0C5dARdYRTilKnU1TG4zenjjEPClkQxAWIpUOxl9xcNJWDVEOPxJ/4pVt+pozyT80Rp7efh6ZiREJIQI1tUboBKSqZzSbnD5uViQNSbQ90PaDP0FIUc0IQ5o07W36rijBB0DTmtU1VofzN9PKkJO7XiSSXevI8RcM= - - SAUCE_USERNAME=url-parse - - SCRIPT=test-browser -script: - - "npm run ${SCRIPT}" -after_script: - - 'if [ "${SCRIPT}" == "test" ]; then npm i coveralls@3 && cat coverage/lcov.info | coveralls; fi' -notifications: - irc: - channels: - - "irc.freenode.org#unshift" - on_success: change - on_failure: change diff --git a/README.md b/README.md index f81f919..e5bf8d7 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,15 @@ # url-parse -[![Made by unshift](https://img.shields.io/badge/made%20by-unshift-00ffcc.svg?style=flat-square)](http://unshift.io)[![Version npm](https://img.shields.io/npm/v/url-parse.svg?style=flat-square)](https://www.npmjs.com/package/url-parse)[![Build Status](https://img.shields.io/travis/unshiftio/url-parse/master.svg?style=flat-square)](https://travis-ci.org/unshiftio/url-parse)[![Dependencies](https://img.shields.io/david/unshiftio/url-parse.svg?style=flat-square)](https://david-dm.org/unshiftio/url-parse)[![Coverage Status](https://img.shields.io/coveralls/unshiftio/url-parse/master.svg?style=flat-square)](https://coveralls.io/r/unshiftio/url-parse?branch=master)[![IRC channel](https://img.shields.io/badge/IRC-irc.freenode.net%23unshift-00a8ff.svg?style=flat-square)](https://webchat.freenode.net/?channels=unshift) +[![Version npm](https://img.shields.io/npm/v/url-parse.svg?style=flat-square)](https://www.npmjs.com/package/url-parse)[![Build Status](https://img.shields.io/github/workflow/status/unshiftio/url-parse/CI/master?label=CI&style=flat-square)](https://github.com/unshiftio/url-parse/actions?query=workflow%3ACI+branch%3Amaster)[![Coverage Status](https://img.shields.io/coveralls/unshiftio/url-parse/master.svg?style=flat-square)](https://coveralls.io/r/unshiftio/url-parse?branch=master) [![Sauce Test Status](https://saucelabs.com/browser-matrix/url-parse.svg)](https://saucelabs.com/u/url-parse) +**`url-parse` was created in 2014 when the WHATWG URL API was not available in +Node.js and the `URL` interface was supported only in some browsers. Today this +is no longer true. The `URL` interface is available in all supported Node.js +release lines and basically all browsers. Consider using it for better security +and accuracy.** + The `url-parse` method exposes two different API interfaces. The [`url`](https://nodejs.org/api/url.html) interface that you know from Node.js and the new [`URL`](https://developer.mozilla.org/en-US/docs/Web/API/URL/URL) @@ -80,8 +86,8 @@ The returned `url` instance contains the following properties: - `auth`: Authentication information portion (e.g. `username:password`). - `username`: Username of basic authentication. - `password`: Password of basic authentication. -- `host`: Host name with port number. -- `hostname`: Host name without port number. +- `host`: Host name with port number. The hostname might be invalid. +- `hostname`: Host name without port number. This might be an invalid hostname. - `port`: Optional port number. - `pathname`: URL path. - `query`: Parsed object containing query string, unless parsing is set to false. diff --git a/SECURITY.md b/SECURITY.md index a1c3d63..f642347 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -33,13 +33,82 @@ acknowledge your responsible disclosure, if you wish. ## History +> Leading control characters are not removed. This allows an attacker to bypass +> hostname checks and makes the `extractProtocol` method return false positives. + +- **Reporter credits** + - Haxatron + - GitHub: [@haxatron](https://github.com/haxatron) + - Twitter: [@haxatron1](https://twitter.com/haxatron1) +- Huntr report: https://www.huntr.dev/bounties/57124ed5-4b68-4934-8325-2c546257f2e4/ +- Fixed in: 1.5.9 + +--- + +> A URL with a specified but empty port can be used to bypass authorization +> checks. + +- **Reporter credits** + - Rohan Sharma + - GitHub: [@r0hansh](https://github.com/r0hansh) +- Huntr report: https://www.huntr.dev/bounties/55fd06cd-9054-4d80-83be-eb5a454be78c/ +- Fixed in: 1.5.8 + +--- + +> A specially crafted URL with empty userinfo and no host can be used to bypass +> authorization checks. + +- **Reporter credits** + - Haxatron + - GitHub: [@haxatron](https://github.com/haxatron) + - Twitter: [@haxatron1](https://twitter.com/haxatron1) +- Huntr report: https://www.huntr.dev/bounties/83a6bc9a-b542-4a38-82cd-d995a1481155/ +- Fixed in: 1.5.7 + +--- + +> Incorrect handling of username and password can lead to authorization bypass. + +- **Reporter credits** + - ranjit-git + - GitHub: [@ranjit-git](https://github.com/ranjit-git) +- Huntr report: https://www.huntr.dev/bounties/6d1bc51f-1876-4f5b-a2c2-734e09e8e05b/ +- Fixed in: 1.5.6 + +--- + +> url-parse mishandles certain uses of a single (back) slash such as https:\ & +> https:/ and interprets the URI as a relative path. Browsers accept a single +> backslash after the protocol, and treat it as a normal slash, while url-parse +> sees it as a relative path. + +- **Reporter credits** + - ready-research + - GitHub: [@ready-research](https://github.com/ready-research) +- Huntr report: https://www.huntr.dev/bounties/1625557993985-unshiftio/url-parse/ +- Fixed in: 1.5.2 + +--- + +> Using backslash in the protocol is valid in the browser, while url-parse +> thinks it’s a relative path. An application that validates a url using +> url-parse might pass a malicious link. + +- **Reporter credits** + - CxSCA AppSec team at Checkmarx. + - Twitter: [Yaniv Nizry](https://twitter.com/ynizry) +- Fixed in: 1.5.0 + +--- + > The `extractProtocol` method does not return the correct protocol when > provided with unsanitized content which could lead to false positives. - **Reporter credits** - Reported through our security email & Twitter interaction. - Twitter: [@ronperris](https://twitter.com/ronperris) - - Fixed in: 1.4.5 +- Fixed in: 1.4.5 --- diff --git a/index.js b/index.js index 9e58eda..b86c29f 100644 --- a/index.js +++ b/index.js @@ -2,19 +2,23 @@ var required = require('requires-port') , qs = require('querystringify') + , controlOrWhitespace = /^[\x00-\x20\u00a0\u1680\u2000-\u200a\u2028\u2029\u202f\u205f\u3000\ufeff]+/ + , CRHTLF = /[\n\r\t]/g , slashes = /^[A-Za-z][A-Za-z0-9+-.]*:\/\// - , protocolre = /^([a-z][a-z0-9.+-]*:)?(\/\/)?([\S\s]*)/i - , whitespace = '[\\x09\\x0A\\x0B\\x0C\\x0D\\x20\\xA0\\u1680\\u180E\\u2000\\u2001\\u2002\\u2003\\u2004\\u2005\\u2006\\u2007\\u2008\\u2009\\u200A\\u202F\\u205F\\u3000\\u2028\\u2029\\uFEFF]' - , left = new RegExp('^'+ whitespace +'+'); + , port = /:\d+$/ + , protocolre = /^([a-z][a-z0-9.+-]*:)?(\/\/)?([\\/]+)?([\S\s]*)/i + , windowsDriveLetter = /^[a-zA-Z]:/; /** - * Trim a given string. + * Remove control characters and whitespace from the beginning of a string. * - * @param {String} str String to trim. + * @param {Object|String} str String to trim. + * @returns {String} A new string representing `str` stripped of control + * characters and whitespace from its beginning. * @public */ function trimLeft(str) { - return (str ? str : '').toString().replace(left, ''); + return (str ? str : '').toString().replace(controlOrWhitespace, ''); } /** @@ -32,13 +36,13 @@ function trimLeft(str) { var rules = [ ['#', 'hash'], // Extract from the back. ['?', 'query'], // Extract from the back. - function sanitize(address) { // Sanitize what is left of the address - return address.replace('\\', '/'); + function sanitize(address, url) { // Sanitize what is left of the address + return isSpecial(url.protocol) ? address.replace(/\\/g, '/') : address; }, ['/', 'pathname'], // Extract from the back. ['@', 'auth', 1], // Extract from the front. [NaN, 'host', undefined, 1, 1], // Set left over value. - [/:(\d+)$/, 'port', undefined, 1], // RegExp the back. + [/:(\d*)$/, 'port', undefined, 1], // RegExp the back. [NaN, 'hostname', undefined, 1, 1] // Set left over. ]; @@ -98,6 +102,24 @@ function lolcation(loc) { return finaldestination; } +/** + * Check whether a protocol scheme is special. + * + * @param {String} The protocol scheme of the URL + * @return {Boolean} `true` if the protocol scheme is special, else `false` + * @private + */ +function isSpecial(scheme) { + return ( + scheme === 'file:' || + scheme === 'ftp:' || + scheme === 'http:' || + scheme === 'https:' || + scheme === 'ws:' || + scheme === 'wss:' + ); +} + /** * @typedef ProtocolExtract * @type Object @@ -110,17 +132,58 @@ function lolcation(loc) { * Extract protocol information from a URL with/without double slash ("//"). * * @param {String} address URL we want to extract from. + * @param {Object} location * @return {ProtocolExtract} Extracted information. * @private */ -function extractProtocol(address) { +function extractProtocol(address, location) { address = trimLeft(address); + address = address.replace(CRHTLF, ''); + location = location || {}; + var match = protocolre.exec(address); + var protocol = match[1] ? match[1].toLowerCase() : ''; + var forwardSlashes = !!match[2]; + var otherSlashes = !!match[3]; + var slashesCount = 0; + var rest; + + if (forwardSlashes) { + if (otherSlashes) { + rest = match[2] + match[3] + match[4]; + slashesCount = match[2].length + match[3].length; + } else { + rest = match[2] + match[4]; + slashesCount = match[2].length; + } + } else { + if (otherSlashes) { + rest = match[3] + match[4]; + slashesCount = match[3].length; + } else { + rest = match[4] + } + } + + if (protocol === 'file:') { + if (slashesCount >= 2) { + rest = rest.slice(2); + } + } else if (isSpecial(protocol)) { + rest = match[4]; + } else if (protocol) { + if (forwardSlashes) { + rest = rest.slice(2); + } + } else if (slashesCount >= 2 && isSpecial(location.protocol)) { + rest = match[4]; + } return { - protocol: match[1] ? match[1].toLowerCase() : '', - slashes: !!match[2], - rest: match[3] + protocol: protocol, + slashes: forwardSlashes || isSpecial(protocol), + slashesCount: slashesCount, + rest: rest }; } @@ -176,6 +239,7 @@ function resolve(relative, base) { */ function Url(address, location, parser) { address = trimLeft(address); + address = address.replace(CRHTLF, ''); if (!(this instanceof Url)) { return new Url(address, location, parser); @@ -210,7 +274,7 @@ function Url(address, location, parser) { // // Extract protocol information before running the instructions. // - extracted = extractProtocol(address || ''); + extracted = extractProtocol(address || '', location); relative = !extracted.protocol && !extracted.slashes; url.slashes = extracted.slashes || relative && location.slashes; url.protocol = extracted.protocol || location.protocol || ''; @@ -220,13 +284,22 @@ function Url(address, location, parser) { // When the authority component is absent the URL starts with a path // component. // - if (!extracted.slashes) instructions[3] = [/(.*)/, 'pathname']; + if ( + extracted.protocol === 'file:' && ( + extracted.slashesCount !== 2 || windowsDriveLetter.test(address)) || + (!extracted.slashes && + (extracted.protocol || + extracted.slashesCount < 2 || + !isSpecial(url.protocol))) + ) { + instructions[3] = [/(.*)/, 'pathname']; + } for (; i < instructions.length; i++) { instruction = instructions[i]; if (typeof instruction === 'function') { - address = instruction(address); + address = instruction(address, url); continue; } @@ -236,7 +309,11 @@ function Url(address, location, parser) { if (parse !== parse) { url[key] = address; } else if ('string' === typeof parse) { - if (~(index = address.indexOf(parse))) { + index = parse === '@' + ? address.lastIndexOf(parse) + : address.indexOf(parse); + + if (~index) { if ('number' === typeof instruction[2]) { url[key] = address.slice(0, index); address = address.slice(index + instruction[2]); @@ -280,6 +357,14 @@ function Url(address, location, parser) { url.pathname = resolve(url.pathname, location.pathname); } + // + // Default to a / for pathname if none exists. This normalizes the URL + // to always have a / + // + if (url.pathname.charAt(0) !== '/' && isSpecial(url.protocol)) { + url.pathname = '/' + url.pathname; + } + // // We should not add port numbers if they are already the default port number // for a given protocol. As the host also contains the port number we're going @@ -294,13 +379,24 @@ function Url(address, location, parser) { // Parse down the `auth` for the username and password. // url.username = url.password = ''; + if (url.auth) { - instruction = url.auth.split(':'); - url.username = instruction[0] || ''; - url.password = instruction[1] || ''; + index = url.auth.indexOf(':'); + + if (~index) { + url.username = url.auth.slice(0, index); + url.username = encodeURIComponent(decodeURIComponent(url.username)); + + url.password = url.auth.slice(index + 1); + url.password = encodeURIComponent(decodeURIComponent(url.password)) + } else { + url.username = encodeURIComponent(decodeURIComponent(url.auth)); + } + + url.auth = url.password ? url.username +':'+ url.password : url.username; } - url.origin = url.protocol && url.host && url.protocol !== 'file:' + url.origin = url.protocol !== 'file:' && isSpecial(url.protocol) && url.host ? url.protocol +'//'+ url.host : 'null'; @@ -357,7 +453,7 @@ function set(part, value, fn) { case 'host': url[part] = value; - if (/:\d+$/.test(value)) { + if (port.test(value)) { value = value.split(':'); url.port = value.pop(); url.hostname = value.join(':'); @@ -383,8 +479,23 @@ function set(part, value, fn) { } break; - default: - url[part] = value; + case 'username': + case 'password': + url[part] = encodeURIComponent(value); + break; + + case 'auth': + var index = value.indexOf(':'); + + if (~index) { + url.username = value.slice(0, index); + url.username = encodeURIComponent(decodeURIComponent(url.username)); + + url.password = value.slice(index + 1); + url.password = encodeURIComponent(decodeURIComponent(url.password)); + } else { + url.username = encodeURIComponent(decodeURIComponent(value)); + } } for (var i = 0; i < rules.length; i++) { @@ -393,7 +504,9 @@ function set(part, value, fn) { if (ins[4]) url[ins[1]] = url[ins[1]].toLowerCase(); } - url.origin = url.protocol && url.host && url.protocol !== 'file:' + url.auth = url.password ? url.username +':'+ url.password : url.username; + + url.origin = url.protocol !== 'file:' && isSpecial(url.protocol) && url.host ? url.protocol +'//'+ url.host : 'null'; @@ -414,19 +527,45 @@ function toString(stringify) { var query , url = this + , host = url.host , protocol = url.protocol; if (protocol && protocol.charAt(protocol.length - 1) !== ':') protocol += ':'; - var result = protocol + (url.slashes ? '//' : ''); + var result = + protocol + + ((url.protocol && url.slashes) || isSpecial(url.protocol) ? '//' : ''); if (url.username) { result += url.username; if (url.password) result += ':'+ url.password; result += '@'; + } else if (url.password) { + result += ':'+ url.password; + result += '@'; + } else if ( + url.protocol !== 'file:' && + isSpecial(url.protocol) && + !host && + url.pathname !== '/' + ) { + // + // Add back the empty userinfo, otherwise the original invalid URL + // might be transformed into a valid one with `url.pathname` as host. + // + result += '@'; + } + + // + // Trailing colon is removed from `url.host` when it is parsed. If it still + // ends with a colon, then add back the trailing colon that was removed. This + // prevents an invalid URL from being transformed into a valid one. + // + if (host[host.length - 1] === ':' || (port.test(url.hostname) && !url.port)) { + host += ':'; } - result += url.host + url.pathname; + result += host + url.pathname; query = 'object' === typeof url.query ? stringify(url.query) : url.query; if (query) result += '?' !== query.charAt(0) ? '?'+ query : query; diff --git a/package.json b/package.json index 2d9ce79..8d1bbbe 100644 --- a/package.json +++ b/package.json @@ -1,12 +1,12 @@ { "name": "url-parse", - "version": "1.4.7", + "version": "1.5.10", "description": "Small footprint URL parser that works seamlessly across Node.js and browser environments", "main": "index.js", "scripts": { "browserify": "rm -rf dist && mkdir -p dist && browserify index.js -s URLParse -o dist/url-parse.js", "minify": "uglifyjs dist/url-parse.js --source-map -cm -o dist/url-parse.min.js", - "test": "nyc --reporter=html --reporter=text mocha test/test.js", + "test": "c8 --reporter=lcov --reporter=text mocha test/test.js", "test-browser": "node test/browser.js", "prepublishOnly": "npm run browserify && npm run minify", "watch": "mocha --watch test/test.js" @@ -38,9 +38,9 @@ }, "devDependencies": { "assume": "^2.2.0", - "browserify": "^16.2.3", - "mocha": "^6.1.4", - "nyc": "^14.0.0", + "browserify": "^17.0.0", + "c8": "^7.3.1", + "mocha": "^9.0.3", "pre-commit": "^1.2.2", "sauce-browsers": "^2.0.0", "sauce-test": "^1.3.3", diff --git a/test/browser.js b/test/browser.js index 8cc3203..63ee99b 100644 --- a/test/browser.js +++ b/test/browser.js @@ -12,8 +12,8 @@ const platforms = sauceBrowsers([ { name: 'firefox', version: ['oldest', 'latest'] }, { name: 'internet explorer', version: 'oldest..latest' }, { name: 'iphone', version: ['oldest', 'latest'] }, - { name: 'safari', version: 'oldest..latest' }, - { name: 'microsoftedge', version: 'oldest..latest' } + { name: 'safari', version: ['oldest', 'latest'] }, + { name: 'microsoftedge', version: ['oldest', 'latest'] } ]).then((platforms) => { return platforms.map((platform) => { const ret = { @@ -29,12 +29,12 @@ const platforms = sauceBrowsers([ }); run(path.join(__dirname, 'test.js'), 'saucelabs', { + jobInfo: { name: pkg.name, build: process.env.GITHUB_RUN_ID }, html: path.join(__dirname, 'index.html'), accessKey: process.env.SAUCE_ACCESS_KEY, username: process.env.SAUCE_USERNAME, browserify: true, disableSSL: true, - name: pkg.name, parallel: 5, platforms }).done((results) => { diff --git a/test/fuzzy.js b/test/fuzzy.js index f0990d3..6052040 100644 --- a/test/fuzzy.js +++ b/test/fuzzy.js @@ -103,6 +103,8 @@ module.exports = function generate() { , key; spec.protocol = get('protocol'); + spec.slashes = true; + spec.hostname = get('hostname'); spec.pathname = get('pathname'); diff --git a/test/test.js b/test/test.js index 977fa3c..4108fe6 100644 --- a/test/test.js +++ b/test/test.js @@ -47,8 +47,14 @@ describe('url-parse', function () { assume(parse.trimLeft).is.a('function'); }); - it('removes whitespace on the left', function () { - assume(parse.trimLeft(' lol')).equals('lol'); + it('removes control characters on the left', function () { + var i = 0; + var prefix = '' + + for (; i < 33; i++) { + prefix = String.fromCharCode(i); + assume(parse.trimLeft(prefix + prefix +'lol')).equals('lol'); + } }); it('calls toString on a given value', function () { @@ -71,7 +77,8 @@ describe('url-parse', function () { assume(parse.extractProtocol('http://example.com')).eql({ slashes: true, protocol: 'http:', - rest: 'example.com' + rest: 'example.com', + slashesCount: 2 }); }); @@ -79,25 +86,53 @@ describe('url-parse', function () { assume(parse.extractProtocol('')).eql({ slashes: false, protocol: '', - rest: '' + rest: '', + slashesCount: 0 + }); + }); + + it('correctly resolves paths', function () { + assume(parse.extractProtocol('/foo')).eql({ + slashes: false, + protocol: '', + rest: '/foo', + slashesCount: 1 + }); + + assume(parse.extractProtocol('//foo/bar')).eql({ + slashes: true, + protocol: '', + rest: '//foo/bar', + slashesCount: 2 }); }); it('does not truncate the input string', function () { - var input = 'foo\nbar\rbaz\u2028qux\u2029'; + var input = 'foo\x0bbar\x0cbaz\u2028qux\u2029'; assume(parse.extractProtocol(input)).eql({ slashes: false, protocol: '', - rest: input + rest: input, + slashesCount: 0 }); }); it('trimsLeft', function () { - assume(parse.extractProtocol(' javascript://foo')).eql({ + assume(parse.extractProtocol('\x0b\x0c javascript://foo')).eql({ slashes: true, protocol: 'javascript:', - rest: 'foo' + rest: 'foo', + slashesCount: 2 + }); + }); + + it('removes CR, HT, and LF', function () { + assume(parse.extractProtocol('jav\n\rasc\nript\r:/\t/fo\no')).eql({ + slashes: true, + protocol: 'javascript:', + rest: 'foo', + slashesCount: 2 }); }); }); @@ -190,9 +225,10 @@ describe('url-parse', function () { , parsed = parse(url); assume(parsed.port).equals(''); + assume(parsed.pathname).equals('/'); assume(parsed.host).equals('example.com'); assume(parsed.hostname).equals('example.com'); - assume(parsed.href).equals('http://example.com'); + assume(parsed.href).equals('http://example.com/'); }); it('understands an / as pathname', function () { @@ -208,6 +244,20 @@ describe('url-parse', function () { assume(parsed.href).equals('http://example.com/'); }); + it('correctly parses pathnames for relative paths', function () { + var url = '/dataApi/PROD/ws' + , parsed = parse(url, 'http://localhost:3000/PROD/trends'); + + assume(parsed.pathname).equals('/dataApi/PROD/ws'); + + url = '/sections/?project=default' + parsed = parse(url, 'http://example.com/foo/bar'); + + assume(parsed.pathname).equals('/sections/'); + assume(parsed.hostname).equals('example.com'); + assume(parsed.href).equals('http://example.com/sections/?project=default'); + }); + it('does not care about spaces', function () { var url = 'http://x.com/path?that\'s#all, folks' , parsed = parse(url); @@ -242,16 +292,202 @@ describe('url-parse', function () { assume(parsed.hostname).equals('google.com'); assume(parsed.hash).equals('#what\\is going on'); - parsed = parse('//\\what-is-up.com'); + parsed = parse('http://yolo.com\\what-is-up.com'); assume(parsed.pathname).equals('/what-is-up.com'); }); it('correctly ignores multiple slashes //', function () { var url = '////what-is-up.com' + , parsed = parse(url, parse('http://google.com')); + + assume(parsed.host).equals('what-is-up.com'); + assume(parsed.href).equals('http://what-is-up.com/'); + + url = '\\\\\\\\what-is-up.com' + parsed = parse(url, parse('http://google.com')); + + assume(parsed.host).equals('what-is-up.com'); + assume(parsed.href).equals('http://what-is-up.com/'); + }); + + it('ignores slashes after the protocol for special URLs', function () { + var url = 'https:\\/github.com/foo/bar' + , parsed = parse(url); + + assume(parsed.host).equals('github.com'); + assume(parsed.hostname).equals('github.com'); + assume(parsed.pathname).equals('/foo/bar'); + assume(parsed.slashes).is.true(); + assume(parsed.href).equals('https://github.com/foo/bar'); + + url = 'https:/\\/\\/\\github.com/foo/bar'; + parsed = parse(url); + assume(parsed.host).equals('github.com'); + assume(parsed.hostname).equals('github.com'); + assume(parsed.pathname).equals('/foo/bar'); + assume(parsed.slashes).is.true(); + assume(parsed.href).equals('https://github.com/foo/bar'); + + url = 'https:/github.com/foo/bar'; + parsed = parse(url); + assume(parsed.host).equals('github.com'); + assume(parsed.pathname).equals('/foo/bar'); + assume(parsed.slashes).is.true(); + assume(parsed.href).equals('https://github.com/foo/bar'); + + url = 'https:\\github.com/foo/bar'; + parsed = parse(url); + assume(parsed.host).equals('github.com'); + assume(parsed.pathname).equals('/foo/bar'); + assume(parsed.slashes).is.true(); + assume(parsed.href).equals('https://github.com/foo/bar'); + + url = 'https:github.com/foo/bar'; + parsed = parse(url); + assume(parsed.host).equals('github.com'); + assume(parsed.pathname).equals('/foo/bar'); + assume(parsed.slashes).is.true(); + assume(parsed.href).equals('https://github.com/foo/bar'); + + url = 'https:github.com/foo/bar'; + parsed = parse(url); + assume(parsed.host).equals('github.com'); + assume(parsed.pathname).equals('/foo/bar'); + assume(parsed.slashes).is.true(); + assume(parsed.href).equals('https://github.com/foo/bar'); + }); + + it('handles slashes after the protocol for non special URLs', function () { + var url = 'foo:example.com' , parsed = parse(url); - assume(parsed.host).equals(''); assume(parsed.hostname).equals(''); + assume(parsed.pathname).equals('example.com'); + assume(parsed.href).equals('foo:example.com'); + assume(parsed.slashes).is.false(); + + url = 'foo:/example.com'; + parsed = parse(url); + assume(parsed.hostname).equals(''); + assume(parsed.pathname).equals('/example.com'); + assume(parsed.href).equals('foo:/example.com'); + assume(parsed.slashes).is.false(); + + url = 'foo:\\example.com'; + parsed = parse(url); + assume(parsed.hostname).equals(''); + assume(parsed.pathname).equals('\\example.com'); + assume(parsed.href).equals('foo:\\example.com'); + assume(parsed.slashes).is.false(); + + url = 'foo://example.com'; + parsed = parse(url); + assume(parsed.hostname).equals('example.com'); + assume(parsed.pathname).equals(''); + assume(parsed.href).equals('foo://example.com'); + assume(parsed.slashes).is.true(); + + url = 'foo:\\\\example.com'; + parsed = parse(url); + assume(parsed.hostname).equals(''); + assume(parsed.pathname).equals('\\\\example.com'); + assume(parsed.href).equals('foo:\\\\example.com'); + assume(parsed.slashes).is.false(); + + url = 'foo:///example.com'; + parsed = parse(url); + assume(parsed.hostname).equals(''); + assume(parsed.pathname).equals('/example.com'); + assume(parsed.href).equals('foo:///example.com'); + assume(parsed.slashes).is.true(); + + url = 'foo:\\\\\\example.com'; + parsed = parse(url); + assume(parsed.hostname).equals(''); + assume(parsed.pathname).equals('\\\\\\example.com'); + assume(parsed.href).equals('foo:\\\\\\example.com'); + assume(parsed.slashes).is.false(); + + url = '\\\\example.com/foo/bar'; + parsed = parse(url, 'foo://bar.com'); + assume(parsed.hostname).equals('bar.com'); + assume(parsed.pathname).equals('/\\\\example.com/foo/bar'); + assume(parsed.href).equals('foo://bar.com/\\\\example.com/foo/bar'); + assume(parsed.slashes).is.true(); + }); + + it('does not readd slashes to href if there is no protocol', function() { + var parsed = parse('//example.com', {}); + + assume(parsed.pathname).equals('//example.com'); + assume(parsed.href).equals('//example.com'); + }); + + it('removes CR, HT, and LF', function () { + var parsed = parse( + 'ht\ntp://a\rb:\tcd@exam\rple.com:80\t80/pat\thname?fo\no=b\rar#ba\tz' + ); + + assume(parsed.protocol).equals('http:'); + assume(parsed.username).equals('ab'); + assume(parsed.password).equals('cd'); + assume(parsed.host).equals('example.com:8080'); + assume(parsed.hostname).equals('example.com'); + assume(parsed.port).equals('8080'); + assume(parsed.pathname).equals('/pathname'); + assume(parsed.query).equals('?foo=bar'); + assume(parsed.hash).equals('#baz'); + assume(parsed.href).equals( + 'http://ab:cd@example.com:8080/pathname?foo=bar#baz' + ); + + parsed = parse('s\nip:al\rice@atl\tanta.com'); + + assume(parsed.protocol).equals('sip:'); + assume(parsed.pathname).equals('alice@atlanta.com'); + assume(parsed.href).equals('sip:alice@atlanta.com'); + }); + + it('handles the case where the port is specified but empty', function () { + var parsed = parse('http://example.com:'); + + assume(parsed.protocol).equals('http:'); + assume(parsed.port).equals(''); + assume(parsed.host).equals('example.com'); + assume(parsed.hostname).equals('example.com'); + assume(parsed.pathname).equals('/'); + assume(parsed.origin).equals('http://example.com'); + assume(parsed.href).equals('http://example.com/'); + + parsed = parse('http://example.com::'); + + assume(parsed.protocol).equals('http:'); + assume(parsed.port).equals(''); + assume(parsed.host).equals('example.com:'); + assume(parsed.hostname).equals('example.com:'); + assume(parsed.pathname).equals('/'); + assume(parsed.origin).equals('http://example.com:'); + assume(parsed.href).equals('http://example.com::/'); + + parsed = parse('http://example.com:8080:'); + + assume(parsed.protocol).equals('http:'); + assume(parsed.port).equals(''); + assume(parsed.host).equals('example.com:8080'); + assume(parsed.hostname).equals('example.com:8080'); + assume(parsed.pathname).equals('/'); + assume(parsed.origin).equals('http://example.com:8080'); + assume(parsed.href).equals('http://example.com:8080:/'); + + parsed = parse('http://example.com:8000:8080'); + + assume(parsed.protocol).equals('http:'); + assume(parsed.port).equals('8080'); + assume(parsed.host).equals('example.com:8000:8080'); + assume(parsed.hostname).equals('example.com:8000'); + assume(parsed.pathname).equals('/'); + assume(parsed.origin).equals('http://example.com:8000:8080'); + assume(parsed.href).equals('http://example.com:8000:8080/'); }); describe('origin', function () { @@ -276,6 +512,13 @@ describe('url-parse', function () { assume(parsed.origin).equals('null'); }); + it('is null for non special URLs', function () { + var o = parse('foo://example.com/pathname'); + assume(o.hostname).equals('example.com'); + assume(o.pathname).equals('/pathname'); + assume(o.origin).equals('null'); + }); + it('removes default ports for http', function () { var o = parse('http://google.com:80/pathname'); assume(o.origin).equals('http://google.com'); @@ -327,32 +570,52 @@ describe('url-parse', function () { it('extracts the right protocol from a url', function () { var testData = [ { - href: 'http://example.com', + href: 'http://example.com/', protocol: 'http:', - pathname: '' + pathname: '/', + slashes: true + }, + { + href: 'ws://example.com/', + protocol: 'ws:', + pathname: '/', + slashes: true + }, + { + href: 'wss://example.com/', + protocol: 'wss:', + pathname: '/', + slashes: true }, { href: 'mailto:test@example.com', pathname: 'test@example.com', - protocol: 'mailto:' + protocol: 'mailto:', + slashes: false }, { href: 'data:text/html,%3Ch1%3EHello%2C%20World!%3C%2Fh1%3E', pathname: 'text/html,%3Ch1%3EHello%2C%20World!%3C%2Fh1%3E', - protocol: 'data:' + protocol: 'data:', + slashes: false, }, { href: 'sip:alice@atlanta.com', pathname: 'alice@atlanta.com', - protocol: 'sip:' + protocol: 'sip:', + slashes: false, } ]; - var data; + var data, test; for (var i = 0, len = testData.length; i < len; ++i) { - data = parse(testData[i].href); - assume(data.protocol).equals(testData[i].protocol); - assume(data.pathname).equals(testData[i].pathname); + test = testData[i]; + data = parse(test.href); + + assume(data.protocol).equals(test.protocol); + assume(data.pathname).equals(test.pathname); + assume(data.slashes).equals(test.slashes); + assume(data.href).equals(test.href); } }); @@ -375,6 +638,67 @@ describe('url-parse', function () { data.set('protocol', 'https:'); assume(data.href).equals('https://google.com/foo'); }); + + it('handles the file: protocol', function () { + var slashes = ['', '/', '//', '///']; + var data; + var url; + + for (var i = 0; i < slashes.length; i++) { + data = parse('file:' + slashes[i]); + assume(data.protocol).equals('file:'); + assume(data.pathname).equals('/'); + assume(data.href).equals('file:///'); + } + + url = 'file:////'; + data = parse(url); + assume(data.protocol).equals('file:'); + assume(data.pathname).equals('//'); + assume(data.href).equals(url); + + url = 'file://///'; + data = parse(url); + assume(data.protocol).equals('file:'); + assume(data.pathname).equals('///'); + assume(data.href).equals(url); + + url = 'file:///Users/foo/BAR/baz.pdf'; + data = parse(url); + assume(data.protocol).equals('file:'); + assume(data.pathname).equals('/Users/foo/BAR/baz.pdf'); + assume(data.href).equals(url); + + url = 'file:///foo/bar?baz=qux#hash'; + data = parse(url); + assume(data.protocol).equals('file:'); + assume(data.hash).equals('#hash'); + assume(data.query).equals('?baz=qux'); + assume(data.pathname).equals('/foo/bar'); + assume(data.href).equals(url); + + data = parse('file://c:\\foo\\bar\\'); + assume(data.protocol).equals('file:'); + assume(data.pathname).equals('/c:/foo/bar/'); + assume(data.href).equals('file:///c:/foo/bar/'); + + data = parse('file://host/file'); + assume(data.protocol).equals('file:'); + assume(data.host).equals('host'); + assume(data.hostname).equals('host'); + assume(data.pathname).equals('/file'); + assume(data.href).equals('file://host/file'); + + data = parse('foo/bar', 'file:///baz'); + assume(data.protocol).equals('file:'); + assume(data.pathname).equals('/foo/bar'); + assume(data.href).equals('file:///foo/bar'); + + data = parse('foo/bar', 'file:///baz/'); + assume(data.protocol).equals('file:'); + assume(data.pathname).equals('/baz/foo/bar'); + assume(data.href).equals('file:///baz/foo/bar'); + }); }); describe('ip', function () { @@ -391,13 +715,14 @@ describe('url-parse', function () { }); it('parses ipv6 with auth', function () { - var url = 'http://user:password@[3ffe:2a00:100:7031::1]:8080' + var url = 'http://user:password@[3ffe:2a00:100:7031::1]:8080/' , parsed = parse(url); assume(parsed.username).equals('user'); assume(parsed.password).equals('password'); assume(parsed.host).equals('[3ffe:2a00:100:7031::1]:8080'); assume(parsed.hostname).equals('[3ffe:2a00:100:7031::1]'); + assume(parsed.pathname).equals('/'); assume(parsed.href).equals(url); }); @@ -446,6 +771,113 @@ describe('url-parse', function () { assume(parsed.hostname).equals('www.example.com'); assume(parsed.href).equals(url); }); + + it('handles @ in username', function () { + var url = 'http://user@@www.example.com/' + , parsed = parse(url); + + assume(parsed.protocol).equals('http:'); + assume(parsed.auth).equals('user%40'); + assume(parsed.username).equals('user%40'); + assume(parsed.password).equals(''); + assume(parsed.hostname).equals('www.example.com'); + assume(parsed.pathname).equals('/'); + assume(parsed.href).equals('http://user%40@www.example.com/'); + + url = 'http://user%40@www.example.com/'; + parsed = parse(url); + + assume(parsed.protocol).equals('http:'); + assume(parsed.auth).equals('user%40'); + assume(parsed.username).equals('user%40'); + assume(parsed.password).equals(''); + assume(parsed.hostname).equals('www.example.com'); + assume(parsed.pathname).equals('/'); + assume(parsed.href).equals('http://user%40@www.example.com/'); + }); + + it('handles @ in password', function () { + var url = 'http://user@:pas:s@@www.example.com/' + , parsed = parse(url); + + assume(parsed.protocol).equals('http:'); + assume(parsed.auth).equals('user%40:pas%3As%40'); + assume(parsed.username).equals('user%40'); + assume(parsed.password).equals('pas%3As%40'); + assume(parsed.hostname).equals('www.example.com'); + assume(parsed.pathname).equals('/'); + assume(parsed.href).equals('http://user%40:pas%3As%40@www.example.com/'); + + url = 'http://user%40:pas%3As%40@www.example.com/' + parsed = parse(url); + + assume(parsed.protocol).equals('http:'); + assume(parsed.auth).equals('user%40:pas%3As%40'); + assume(parsed.username).equals('user%40'); + assume(parsed.password).equals('pas%3As%40'); + assume(parsed.hostname).equals('www.example.com'); + assume(parsed.pathname).equals('/'); + assume(parsed.href).equals('http://user%40:pas%3As%40@www.example.com/'); + }); + + it('adds @ to href if auth and host are empty', function () { + var parsed, i = 0; + var urls = [ + 'http:@/127.0.0.1', + 'http::@/127.0.0.1', + 'http:/@/127.0.0.1', + 'http:/:@/127.0.0.1', + 'http://@/127.0.0.1', + 'http://:@/127.0.0.1', + 'http:///@/127.0.0.1', + 'http:///:@/127.0.0.1' + ]; + + for (; i < urls.length; i++) { + parsed = parse(urls[i]); + + assume(parsed.protocol).equals('http:'); + assume(parsed.auth).equals(''); + assume(parsed.username).equals(''); + assume(parsed.password).equals(''); + assume(parsed.host).equals(''); + assume(parsed.hostname).equals(''); + assume(parsed.pathname).equals('/127.0.0.1'); + assume(parsed.origin).equals('null'); + assume(parsed.href).equals('http://@/127.0.0.1'); + assume(parsed.toString()).equals('http://@/127.0.0.1'); + } + + urls = [ + 'http:@/', + 'http:@', + 'http::@/', + 'http::@', + 'http:/@/', + 'http:/@', + 'http:/:@/', + 'http:/:@', + 'http://@/', + 'http://@', + 'http://:@/', + 'http://:@' + ]; + + for (i = 0; i < urls.length; i++) { + parsed = parse(urls[i]); + + assume(parsed.protocol).equals('http:'); + assume(parsed.auth).equals(''); + assume(parsed.username).equals(''); + assume(parsed.password).equals(''); + assume(parsed.host).equals(''); + assume(parsed.hostname).equals(''); + assume(parsed.pathname).equals('/'); + assume(parsed.origin).equals('null'); + assume(parsed.href).equals('http:///'); + assume(parsed.toString()).equals('http:///'); + } + }); }); it('accepts multiple ???', function () { @@ -467,7 +899,7 @@ describe('url-parse', function () { assume(data.port).equals(''); assume(data.host).equals('localhost'); - assume(data.href).equals('http://localhost'); + assume(data.href).equals('http://localhost/'); }); it('inherits port numbers for relative urls', function () { @@ -516,7 +948,8 @@ describe('url-parse', function () { }); it('inherits protocol for relative protocols', function () { - var data = parse('//foo.com/foo', parse('http://sub.example.com:808/')); + var lolcation = parse('http://sub.example.com:808/') + , data = parse('//foo.com/foo', lolcation); assume(data.port).equals(''); assume(data.host).equals('foo.com'); @@ -529,13 +962,13 @@ describe('url-parse', function () { assume(data.port).equals(''); assume(data.host).equals('localhost'); - assume(data.href).equals('http://localhost'); + assume(data.href).equals('http://localhost/'); }); it('resolves pathname for relative urls', function () { var data, i = 0; var tests = [ - ['', 'http://foo.com', ''], + ['', 'http://foo.com', '/'], ['', 'http://foo.com/', '/'], ['', 'http://foo.com/a', '/a'], ['a', 'http://foo.com', '/a'], @@ -722,12 +1155,12 @@ describe('url-parse', function () { data.set('hash', 'usage'); assume(data.hash).equals('#usage'); - assume(data.href).equals('http://example.com#usage'); + assume(data.href).equals('http://example.com/#usage'); data.set('hash', '#license'); assume(data.hash).equals('#license'); - assume(data.href).equals('http://example.com#license'); + assume(data.href).equals('http://example.com/#license'); }); it('updates the port when updating host', function () { @@ -802,6 +1235,106 @@ describe('url-parse', function () { assume(data.href).equals('mailto:alice@atlanta.com'); }); + it('updates auth when updating username', function() { + var data = parse('https://example.com'); + + assume(data.set('username', 'foo')).equals(data); + assume(data.username).equals('foo'); + assume(data.auth).equals('foo') + assume(data.href).equals('https://foo@example.com/'); + + data.set('username', ''); + + assume(data.username).equals(''); + assume(data.auth).equals('') + assume(data.href).equals('https://example.com/'); + + data.set('username', 'foo:'); + + assume(data.username).equals('foo%3A'); + assume(data.auth).equals('foo%3A') + assume(data.href).equals('https://foo%3A@example.com/'); + + data = parse('https://foo:bar@example.com') + data.set('username', 'baz'); + + assume(data.username).equals('baz'); + assume(data.auth).equals('baz:bar') + assume(data.href).equals('https://baz:bar@example.com/'); + }); + + it('updates auth when updating password', function() { + var data = parse('https://example.com'); + + assume(data.set('password', 'foo')).equals(data); + assume(data.password).equals('foo'); + assume(data.auth).equals(':foo') + assume(data.href).equals('https://:foo@example.com/'); + + data.set('password', ''); + + assume(data.password).equals(''); + assume(data.auth).equals('') + assume(data.href).equals('https://example.com/'); + + data.set('password', ':foo@'); + + assume(data.password).equals('%3Afoo%40'); + assume(data.auth).equals(':%3Afoo%40') + assume(data.href).equals('https://:%3Afoo%40@example.com/'); + + data = parse('https://foo:bar@example.com') + data.set('password', 'baz'); + + assume(data.password).equals('baz'); + assume(data.auth).equals('foo:baz') + assume(data.href).equals('https://foo:baz@example.com/'); + }); + + it('updates username and password when updating auth', function() { + var data = parse('https://example.com'); + + assume(data.set('auth', 'foo:bar')).equals(data); + assume(data.username).equals('foo'); + assume(data.password).equals('bar'); + assume(data.href).equals('https://foo:bar@example.com/'); + + assume(data.set('auth', 'baz:')).equals(data); + assume(data.username).equals('baz'); + assume(data.password).equals(''); + assume(data.href).equals('https://baz@example.com/'); + + assume(data.set('auth', 'qux')).equals(data); + assume(data.username).equals('qux'); + assume(data.password).equals(''); + assume(data.href).equals('https://qux@example.com/'); + + assume(data.set('auth', ':quux')).equals(data); + assume(data.username).equals(''); + assume(data.password).equals('quux'); + assume(data.href).equals('https://:quux@example.com/'); + + assume(data.set('auth', 'user@:pass@')).equals(data); + assume(data.username).equals('user%40'); + assume(data.password).equals('pass%40'); + assume(data.href).equals('https://user%40:pass%40@example.com/'); + + assume(data.set('auth', 'user%40:pass%40')).equals(data); + assume(data.username).equals('user%40'); + assume(data.password).equals('pass%40'); + assume(data.href).equals('https://user%40:pass%40@example.com/'); + + assume(data.set('auth', 'user:pass:word')).equals(data); + assume(data.username).equals('user'); + assume(data.password).equals('pass%3Aword'); + assume(data.href).equals('https://user:pass%3Aword@example.com/'); + + assume(data.set('auth', 'user:pass%3Aword')).equals(data); + assume(data.username).equals('user'); + assume(data.password).equals('pass%3Aword'); + assume(data.href).equals('https://user:pass%3Aword@example.com/'); + }); + it('updates other values', function () { var data = parse('http://google.com/?foo=bar');