/*! * RegJSGen * Copyright 2014 Benjamin Tan * Available under MIT license */ ;(function() { 'use strict'; /** Used to determine if values are of the language type `Object` */ var objectTypes = { 'function': true, 'object': true }; /** Used as a reference to the global object */ var root = (objectTypes[typeof window] && window) || this; /** Backup possible global object */ var oldRoot = root; /** Detect free variable `exports` */ var freeExports = objectTypes[typeof exports] && exports; /** Detect free variable `module` */ var freeModule = objectTypes[typeof module] && module && !module.nodeType && module; /** Detect free variable `global` from Node.js or Browserified code and use it as `root` */ var freeGlobal = freeExports && freeModule && typeof global == 'object' && global; if (freeGlobal && (freeGlobal.global === freeGlobal || freeGlobal.window === freeGlobal || freeGlobal.self === freeGlobal)) { root = freeGlobal; } /*--------------------------------------------------------------------------*/ /*! Based on https://mths.be/fromcodepoint v0.2.0 by @mathias */ var stringFromCharCode = String.fromCharCode; var floor = Math.floor; function fromCodePoint() { var MAX_SIZE = 0x4000; var codeUnits = []; var highSurrogate; var lowSurrogate; var index = -1; var length = arguments.length; if (!length) { return ''; } var result = ''; while (++index < length) { var codePoint = Number(arguments[index]); if ( !isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity` codePoint < 0 || // not a valid Unicode code point codePoint > 0x10FFFF || // not a valid Unicode code point floor(codePoint) != codePoint // not an integer ) { throw RangeError('Invalid code point: ' + codePoint); } if (codePoint <= 0xFFFF) { // BMP code point codeUnits.push(codePoint); } else { // Astral code point; split in surrogate halves // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae codePoint -= 0x10000; highSurrogate = (codePoint >> 10) + 0xD800; lowSurrogate = (codePoint % 0x400) + 0xDC00; codeUnits.push(highSurrogate, lowSurrogate); } if (index + 1 == length || codeUnits.length > MAX_SIZE) { result += stringFromCharCode.apply(null, codeUnits); codeUnits.length = 0; } } return result; } function assertType(type, expected) { if (expected.indexOf('|') == -1) { if (type == expected) { return; } throw Error('Invalid node type: ' + type); } expected = assertType.hasOwnProperty(expected) ? assertType[expected] : (assertType[expected] = RegExp('^(?:' + expected + ')$')); if (expected.test(type)) { return; } throw Error('Invalid node type: ' + type); } /*--------------------------------------------------------------------------*/ function generate(node) { var type = node.type; if (generate.hasOwnProperty(type) && typeof generate[type] == 'function') { return generate[type](node); } throw Error('Invalid node type: ' + type); } /*--------------------------------------------------------------------------*/ function generateAlternative(node) { assertType(node.type, 'alternative'); var terms = node.body, length = terms ? terms.length : 0; if (length == 1) { return generateTerm(terms[0]); } else { var i = -1, result = ''; while (++i < length) { result += generateTerm(terms[i]); } return result; } } function generateAnchor(node) { assertType(node.type, 'anchor'); switch (node.kind) { case 'start': return '^'; case 'end': return '$'; case 'boundary': return '\\b'; case 'not-boundary': return '\\B'; default: throw Error('Invalid assertion'); } } function generateAtom(node) { assertType(node.type, 'anchor|characterClass|characterClassEscape|dot|group|reference|value'); return generate(node); } function generateCharacterClass(node) { assertType(node.type, 'characterClass'); var classRanges = node.body, length = classRanges ? classRanges.length : 0; var i = -1, result = '['; if (node.negative) { result += '^'; } while (++i < length) { result += generateClassAtom(classRanges[i]); } result += ']'; return result; } function generateCharacterClassEscape(node) { assertType(node.type, 'characterClassEscape'); return '\\' + node.value; } function generateCharacterClassRange(node) { assertType(node.type, 'characterClassRange'); var min = node.min, max = node.max; if (min.type == 'characterClassRange' || max.type == 'characterClassRange') { throw Error('Invalid character class range'); } return generateClassAtom(min) + '-' + generateClassAtom(max); } function generateClassAtom(node) { assertType(node.type, 'anchor|characterClassEscape|characterClassRange|dot|value'); return generate(node); } function generateDisjunction(node) { assertType(node.type, 'disjunction'); var body = node.body, length = body ? body.length : 0; if (length == 0) { throw Error('No body'); } else if (length == 1) { return generate(body[0]); } else { var i = -1, result = ''; while (++i < length) { if (i != 0) { result += '|'; } result += generate(body[i]); } return result; } } function generateDot(node) { assertType(node.type, 'dot'); return '.'; } function generateGroup(node) { assertType(node.type, 'group'); var result = '('; switch (node.behavior) { case 'normal': break; case 'ignore': result += '?:'; break; case 'lookahead': result += '?='; break; case 'negativeLookahead': result += '?!'; break; default: throw Error('Invalid behaviour: ' + node.behaviour); } var body = node.body, length = body ? body.length : 0; if (length == 1) { result += generate(body[0]); } else { var i = -1; while (++i < length) { result += generate(body[i]); } } result += ')'; return result; } function generateQuantifier(node) { assertType(node.type, 'quantifier'); var quantifier = '', min = node.min, max = node.max; switch (max) { case undefined: case null: switch (min) { case 0: quantifier = '*' break; case 1: quantifier = '+'; break; default: quantifier = '{' + min + ',}'; break; } break; default: if (min == max) { quantifier = '{' + min + '}'; } else if (min == 0 && max == 1) { quantifier = '?'; } else { quantifier = '{' + min + ',' + max + '}'; } break; } if (!node.greedy) { quantifier += '?'; } return generateAtom(node.body[0]) + quantifier; } function generateReference(node) { assertType(node.type, 'reference'); return '\\' + node.matchIndex; } function generateTerm(node) { assertType(node.type, 'anchor|characterClass|characterClassEscape|empty|group|quantifier|reference|value'); return generate(node); } function generateValue(node) { assertType(node.type, 'value'); var kind = node.kind, codePoint = node.codePoint; switch (kind) { case 'controlLetter': return '\\c' + fromCodePoint(codePoint + 64); case 'hexadecimalEscape': return '\\x' + ('00' + codePoint.toString(16).toUpperCase()).slice(-2); case 'identifier': return '\\' + fromCodePoint(codePoint); case 'null': return '\\' + codePoint; case 'octal': return '\\' + codePoint.toString(8); case 'singleEscape': switch (codePoint) { case 0x0008: return '\\b'; case 0x009: return '\\t'; case 0x00A: return '\\n'; case 0x00B: return '\\v'; case 0x00C: return '\\f'; case 0x00D: return '\\r'; default: throw Error('Invalid codepoint: ' + codePoint); } case 'symbol': return fromCodePoint(codePoint); case 'unicodeEscape': return '\\u' + ('0000' + codePoint.toString(16).toUpperCase()).slice(-4); case 'unicodeCodePointEscape': return '\\u{' + codePoint.toString(16).toUpperCase() + '}'; default: throw Error('Unsupported node kind: ' + kind); } } /*--------------------------------------------------------------------------*/ generate.alternative = generateAlternative; generate.anchor = generateAnchor; generate.characterClass = generateCharacterClass; generate.characterClassEscape = generateCharacterClassEscape; generate.characterClassRange = generateCharacterClassRange; generate.disjunction = generateDisjunction; generate.dot = generateDot; generate.group = generateGroup; generate.quantifier = generateQuantifier; generate.reference = generateReference; generate.value = generateValue; /*--------------------------------------------------------------------------*/ // export regjsgen // some AMD build optimizers, like r.js, check for condition patterns like the following: if (typeof define == 'function' && typeof define.amd == 'object' && define.amd) { // define as an anonymous module so, through path mapping, it can be aliased define(function() { return { 'generate': generate }; }); } // check for `exports` after `define` in case a build optimizer adds an `exports` object else if (freeExports && freeModule) { // in Narwhal, Node.js, Rhino -require, or RingoJS freeExports.generate = generate; } // in a browser or Rhino else { root.regjsgen = { 'generate': generate }; } }.call(this));