'use strict'; const url = require('url'); const punycode = require('punycode'); const queryString = require('query-string'); const prependHttp = require('prepend-http'); const sortKeys = require('sort-keys'); const DEFAULT_PORTS = { 'http:': 80, 'https:': 443, 'ftp:': 21 }; // Protocols that always contain a `//`` bit const slashedProtocol = { http: true, https: true, ftp: true, gopher: true, file: true, 'http:': true, 'https:': true, 'ftp:': true, 'gopher:': true, 'file:': true }; function testParameter(name, filters) { return filters.some(filter => filter instanceof RegExp ? filter.test(name) : filter === name); } module.exports = (str, opts) => { opts = Object.assign({ normalizeProtocol: true, normalizeHttps: false, stripFragment: true, stripWWW: true, removeQueryParameters: [/^utm_\w+/i], removeTrailingSlash: true, removeDirectoryIndex: false, sortQueryParameters: true }, opts); if (typeof str !== 'string') { throw new TypeError('Expected a string'); } const hasRelativeProtocol = str.startsWith('//'); // Prepend protocol str = prependHttp(str.trim()).replace(/^\/\//, 'http://'); const urlObj = url.parse(str); if (opts.normalizeHttps && urlObj.protocol === 'https:') { urlObj.protocol = 'http:'; } if (!urlObj.hostname && !urlObj.pathname) { throw new Error('Invalid URL'); } // Prevent these from being used by `url.format` delete urlObj.host; delete urlObj.query; // Remove fragment if (opts.stripFragment) { delete urlObj.hash; } // Remove default port const port = DEFAULT_PORTS[urlObj.protocol]; if (Number(urlObj.port) === port) { delete urlObj.port; } // Remove duplicate slashes if (urlObj.pathname) { urlObj.pathname = urlObj.pathname.replace(/\/{2,}/g, '/'); } // Decode URI octets if (urlObj.pathname) { urlObj.pathname = decodeURI(urlObj.pathname); } // Remove directory index if (opts.removeDirectoryIndex === true) { opts.removeDirectoryIndex = [/^index\.[a-z]+$/]; } if (Array.isArray(opts.removeDirectoryIndex) && opts.removeDirectoryIndex.length > 0) { let pathComponents = urlObj.pathname.split('/'); const lastComponent = pathComponents[pathComponents.length - 1]; if (testParameter(lastComponent, opts.removeDirectoryIndex)) { pathComponents = pathComponents.slice(0, pathComponents.length - 1); urlObj.pathname = pathComponents.slice(1).join('/') + '/'; } } // Resolve relative paths, but only for slashed protocols if (slashedProtocol[urlObj.protocol]) { const domain = urlObj.protocol + '//' + urlObj.hostname; const relative = url.resolve(domain, urlObj.pathname); urlObj.pathname = relative.replace(domain, ''); } if (urlObj.hostname) { // IDN to Unicode urlObj.hostname = punycode.toUnicode(urlObj.hostname).toLowerCase(); // Remove trailing dot urlObj.hostname = urlObj.hostname.replace(/\.$/, ''); // Remove `www.` if (opts.stripWWW) { urlObj.hostname = urlObj.hostname.replace(/^www\./, ''); } } // Remove URL with empty query string if (urlObj.search === '?') { delete urlObj.search; } const queryParameters = queryString.parse(urlObj.search); // Remove query unwanted parameters if (Array.isArray(opts.removeQueryParameters)) { for (const key in queryParameters) { if (testParameter(key, opts.removeQueryParameters)) { delete queryParameters[key]; } } } // Sort query parameters if (opts.sortQueryParameters) { urlObj.search = queryString.stringify(sortKeys(queryParameters)); } // Decode query parameters if (urlObj.search !== null) { urlObj.search = decodeURIComponent(urlObj.search); } // Take advantage of many of the Node `url` normalizations str = url.format(urlObj); // Remove ending `/` if (opts.removeTrailingSlash || urlObj.pathname === '/') { str = str.replace(/\/$/, ''); } // Restore relative protocol, if applicable if (hasRelativeProtocol && !opts.normalizeProtocol) { str = str.replace(/^http:\/\//, '//'); } return str; };