569 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
			
		
		
	
	
			569 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
| var Tokenizer = require('./tokenizer');
 | |
| var TAB = 9;
 | |
| var N = 10;
 | |
| var F = 12;
 | |
| var R = 13;
 | |
| var SPACE = 32;
 | |
| var EXCLAMATIONMARK = 33;    // !
 | |
| var NUMBERSIGN = 35;         // #
 | |
| var AMPERSAND = 38;          // &
 | |
| var APOSTROPHE = 39;         // '
 | |
| var LEFTPARENTHESIS = 40;    // (
 | |
| var RIGHTPARENTHESIS = 41;   // )
 | |
| var ASTERISK = 42;           // *
 | |
| var PLUSSIGN = 43;           // +
 | |
| var COMMA = 44;              // ,
 | |
| var HYPERMINUS = 45;         // -
 | |
| var LESSTHANSIGN = 60;       // <
 | |
| var GREATERTHANSIGN = 62;    // >
 | |
| var QUESTIONMARK = 63;       // ?
 | |
| var COMMERCIALAT = 64;       // @
 | |
| var LEFTSQUAREBRACKET = 91;  // [
 | |
| var RIGHTSQUAREBRACKET = 93; // ]
 | |
| var LEFTCURLYBRACKET = 123;  // {
 | |
| var VERTICALLINE = 124;      // |
 | |
| var RIGHTCURLYBRACKET = 125; // }
 | |
| var INFINITY = 8734;         // ∞
 | |
| var NAME_CHAR = createCharMap(function(ch) {
 | |
|     return /[a-zA-Z0-9\-]/.test(ch);
 | |
| });
 | |
| var COMBINATOR_PRECEDENCE = {
 | |
|     ' ': 1,
 | |
|     '&&': 2,
 | |
|     '||': 3,
 | |
|     '|': 4
 | |
| };
 | |
| 
 | |
| function createCharMap(fn) {
 | |
|     var array = typeof Uint32Array === 'function' ? new Uint32Array(128) : new Array(128);
 | |
|     for (var i = 0; i < 128; i++) {
 | |
|         array[i] = fn(String.fromCharCode(i)) ? 1 : 0;
 | |
|     }
 | |
|     return array;
 | |
| }
 | |
| 
 | |
| function scanSpaces(tokenizer) {
 | |
|     return tokenizer.substringToPos(
 | |
|         tokenizer.findWsEnd(tokenizer.pos)
 | |
|     );
 | |
| }
 | |
| 
 | |
| function scanWord(tokenizer) {
 | |
|     var end = tokenizer.pos;
 | |
| 
 | |
|     for (; end < tokenizer.str.length; end++) {
 | |
|         var code = tokenizer.str.charCodeAt(end);
 | |
|         if (code >= 128 || NAME_CHAR[code] === 0) {
 | |
|             break;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     if (tokenizer.pos === end) {
 | |
|         tokenizer.error('Expect a keyword');
 | |
|     }
 | |
| 
 | |
|     return tokenizer.substringToPos(end);
 | |
| }
 | |
| 
 | |
| function scanNumber(tokenizer) {
 | |
|     var end = tokenizer.pos;
 | |
| 
 | |
|     for (; end < tokenizer.str.length; end++) {
 | |
|         var code = tokenizer.str.charCodeAt(end);
 | |
|         if (code < 48 || code > 57) {
 | |
|             break;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     if (tokenizer.pos === end) {
 | |
|         tokenizer.error('Expect a number');
 | |
|     }
 | |
| 
 | |
|     return tokenizer.substringToPos(end);
 | |
| }
 | |
| 
 | |
| function scanString(tokenizer) {
 | |
|     var end = tokenizer.str.indexOf('\'', tokenizer.pos + 1);
 | |
| 
 | |
|     if (end === -1) {
 | |
|         tokenizer.pos = tokenizer.str.length;
 | |
|         tokenizer.error('Expect an apostrophe');
 | |
|     }
 | |
| 
 | |
|     return tokenizer.substringToPos(end + 1);
 | |
| }
 | |
| 
 | |
| function readMultiplierRange(tokenizer) {
 | |
|     var min = null;
 | |
|     var max = null;
 | |
| 
 | |
|     tokenizer.eat(LEFTCURLYBRACKET);
 | |
| 
 | |
|     min = scanNumber(tokenizer);
 | |
| 
 | |
|     if (tokenizer.charCode() === COMMA) {
 | |
|         tokenizer.pos++;
 | |
|         if (tokenizer.charCode() !== RIGHTCURLYBRACKET) {
 | |
|             max = scanNumber(tokenizer);
 | |
|         }
 | |
|     } else {
 | |
|         max = min;
 | |
|     }
 | |
| 
 | |
|     tokenizer.eat(RIGHTCURLYBRACKET);
 | |
| 
 | |
|     return {
 | |
|         min: Number(min),
 | |
|         max: max ? Number(max) : 0
 | |
|     };
 | |
| }
 | |
| 
 | |
| function readMultiplier(tokenizer) {
 | |
|     var range = null;
 | |
|     var comma = false;
 | |
| 
 | |
|     switch (tokenizer.charCode()) {
 | |
|         case ASTERISK:
 | |
|             tokenizer.pos++;
 | |
| 
 | |
|             range = {
 | |
|                 min: 0,
 | |
|                 max: 0
 | |
|             };
 | |
| 
 | |
|             break;
 | |
| 
 | |
|         case PLUSSIGN:
 | |
|             tokenizer.pos++;
 | |
| 
 | |
|             range = {
 | |
|                 min: 1,
 | |
|                 max: 0
 | |
|             };
 | |
| 
 | |
|             break;
 | |
| 
 | |
|         case QUESTIONMARK:
 | |
|             tokenizer.pos++;
 | |
| 
 | |
|             range = {
 | |
|                 min: 0,
 | |
|                 max: 1
 | |
|             };
 | |
| 
 | |
|             break;
 | |
| 
 | |
|         case NUMBERSIGN:
 | |
|             tokenizer.pos++;
 | |
| 
 | |
|             comma = true;
 | |
| 
 | |
|             if (tokenizer.charCode() === LEFTCURLYBRACKET) {
 | |
|                 range = readMultiplierRange(tokenizer);
 | |
|             } else {
 | |
|                 range = {
 | |
|                     min: 1,
 | |
|                     max: 0
 | |
|                 };
 | |
|             }
 | |
| 
 | |
|             break;
 | |
| 
 | |
|         case LEFTCURLYBRACKET:
 | |
|             range = readMultiplierRange(tokenizer);
 | |
|             break;
 | |
| 
 | |
|         default:
 | |
|             return null;
 | |
|     }
 | |
| 
 | |
|     return {
 | |
|         type: 'Multiplier',
 | |
|         comma: comma,
 | |
|         min: range.min,
 | |
|         max: range.max,
 | |
|         term: null
 | |
|     };
 | |
| }
 | |
| 
 | |
| function maybeMultiplied(tokenizer, node) {
 | |
|     var multiplier = readMultiplier(tokenizer);
 | |
| 
 | |
|     if (multiplier !== null) {
 | |
|         multiplier.term = node;
 | |
|         return multiplier;
 | |
|     }
 | |
| 
 | |
|     return node;
 | |
| }
 | |
| 
 | |
| function maybeToken(tokenizer) {
 | |
|     var ch = tokenizer.peek();
 | |
| 
 | |
|     if (ch === '') {
 | |
|         return null;
 | |
|     }
 | |
| 
 | |
|     return {
 | |
|         type: 'Token',
 | |
|         value: ch
 | |
|     };
 | |
| }
 | |
| 
 | |
| function readProperty(tokenizer) {
 | |
|     var name;
 | |
| 
 | |
|     tokenizer.eat(LESSTHANSIGN);
 | |
|     tokenizer.eat(APOSTROPHE);
 | |
| 
 | |
|     name = scanWord(tokenizer);
 | |
| 
 | |
|     tokenizer.eat(APOSTROPHE);
 | |
|     tokenizer.eat(GREATERTHANSIGN);
 | |
| 
 | |
|     return maybeMultiplied(tokenizer, {
 | |
|         type: 'Property',
 | |
|         name: name
 | |
|     });
 | |
| }
 | |
| 
 | |
| // https://drafts.csswg.org/css-values-3/#numeric-ranges
 | |
| // 4.1. Range Restrictions and Range Definition Notation
 | |
| //
 | |
| // Range restrictions can be annotated in the numeric type notation using CSS bracketed
 | |
| // range notation—[min,max]—within the angle brackets, after the identifying keyword,
 | |
| // indicating a closed range between (and including) min and max.
 | |
| // For example, <integer [0, 10]> indicates an integer between 0 and 10, inclusive.
 | |
| function readTypeRange(tokenizer) {
 | |
|     // use null for Infinity to make AST format JSON serializable/deserializable
 | |
|     var min = null; // -Infinity
 | |
|     var max = null; // Infinity
 | |
|     var sign = 1;
 | |
| 
 | |
|     tokenizer.eat(LEFTSQUAREBRACKET);
 | |
| 
 | |
|     if (tokenizer.charCode() === HYPERMINUS) {
 | |
|         tokenizer.peek();
 | |
|         sign = -1;
 | |
|     }
 | |
| 
 | |
|     if (sign == -1 && tokenizer.charCode() === INFINITY) {
 | |
|         tokenizer.peek();
 | |
|     } else {
 | |
|         min = sign * Number(scanNumber(tokenizer));
 | |
|     }
 | |
| 
 | |
|     scanSpaces(tokenizer);
 | |
|     tokenizer.eat(COMMA);
 | |
|     scanSpaces(tokenizer);
 | |
| 
 | |
|     if (tokenizer.charCode() === INFINITY) {
 | |
|         tokenizer.peek();
 | |
|     } else {
 | |
|         sign = 1;
 | |
| 
 | |
|         if (tokenizer.charCode() === HYPERMINUS) {
 | |
|             tokenizer.peek();
 | |
|             sign = -1;
 | |
|         }
 | |
| 
 | |
|         max = sign * Number(scanNumber(tokenizer));
 | |
|     }
 | |
| 
 | |
|     tokenizer.eat(RIGHTSQUAREBRACKET);
 | |
| 
 | |
|     // If no range is indicated, either by using the bracketed range notation
 | |
|     // or in the property description, then [−∞,∞] is assumed.
 | |
|     if (min === null && max === null) {
 | |
|         return null;
 | |
|     }
 | |
| 
 | |
|     return {
 | |
|         type: 'Range',
 | |
|         min: min,
 | |
|         max: max
 | |
|     };
 | |
| }
 | |
| 
 | |
| function readType(tokenizer) {
 | |
|     var name;
 | |
|     var opts = null;
 | |
| 
 | |
|     tokenizer.eat(LESSTHANSIGN);
 | |
|     name = scanWord(tokenizer);
 | |
| 
 | |
|     if (tokenizer.charCode() === LEFTPARENTHESIS &&
 | |
|         tokenizer.nextCharCode() === RIGHTPARENTHESIS) {
 | |
|         tokenizer.pos += 2;
 | |
|         name += '()';
 | |
|     }
 | |
| 
 | |
|     if (tokenizer.charCodeAt(tokenizer.findWsEnd(tokenizer.pos)) === LEFTSQUAREBRACKET) {
 | |
|         scanSpaces(tokenizer);
 | |
|         opts = readTypeRange(tokenizer);
 | |
|     }
 | |
| 
 | |
|     tokenizer.eat(GREATERTHANSIGN);
 | |
| 
 | |
|     return maybeMultiplied(tokenizer, {
 | |
|         type: 'Type',
 | |
|         name: name,
 | |
|         opts: opts
 | |
|     });
 | |
| }
 | |
| 
 | |
| function readKeywordOrFunction(tokenizer) {
 | |
|     var name;
 | |
| 
 | |
|     name = scanWord(tokenizer);
 | |
| 
 | |
|     if (tokenizer.charCode() === LEFTPARENTHESIS) {
 | |
|         tokenizer.pos++;
 | |
| 
 | |
|         return {
 | |
|             type: 'Function',
 | |
|             name: name
 | |
|         };
 | |
|     }
 | |
| 
 | |
|     return maybeMultiplied(tokenizer, {
 | |
|         type: 'Keyword',
 | |
|         name: name
 | |
|     });
 | |
| }
 | |
| 
 | |
| function regroupTerms(terms, combinators) {
 | |
|     function createGroup(terms, combinator) {
 | |
|         return {
 | |
|             type: 'Group',
 | |
|             terms: terms,
 | |
|             combinator: combinator,
 | |
|             disallowEmpty: false,
 | |
|             explicit: false
 | |
|         };
 | |
|     }
 | |
| 
 | |
|     combinators = Object.keys(combinators).sort(function(a, b) {
 | |
|         return COMBINATOR_PRECEDENCE[a] - COMBINATOR_PRECEDENCE[b];
 | |
|     });
 | |
| 
 | |
|     while (combinators.length > 0) {
 | |
|         var combinator = combinators.shift();
 | |
|         for (var i = 0, subgroupStart = 0; i < terms.length; i++) {
 | |
|             var term = terms[i];
 | |
|             if (term.type === 'Combinator') {
 | |
|                 if (term.value === combinator) {
 | |
|                     if (subgroupStart === -1) {
 | |
|                         subgroupStart = i - 1;
 | |
|                     }
 | |
|                     terms.splice(i, 1);
 | |
|                     i--;
 | |
|                 } else {
 | |
|                     if (subgroupStart !== -1 && i - subgroupStart > 1) {
 | |
|                         terms.splice(
 | |
|                             subgroupStart,
 | |
|                             i - subgroupStart,
 | |
|                             createGroup(terms.slice(subgroupStart, i), combinator)
 | |
|                         );
 | |
|                         i = subgroupStart + 1;
 | |
|                     }
 | |
|                     subgroupStart = -1;
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         if (subgroupStart !== -1 && combinators.length) {
 | |
|             terms.splice(
 | |
|                 subgroupStart,
 | |
|                 i - subgroupStart,
 | |
|                 createGroup(terms.slice(subgroupStart, i), combinator)
 | |
|             );
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     return combinator;
 | |
| }
 | |
| 
 | |
| function readImplicitGroup(tokenizer) {
 | |
|     var terms = [];
 | |
|     var combinators = {};
 | |
|     var token;
 | |
|     var prevToken = null;
 | |
|     var prevTokenPos = tokenizer.pos;
 | |
| 
 | |
|     while (token = peek(tokenizer)) {
 | |
|         if (token.type !== 'Spaces') {
 | |
|             if (token.type === 'Combinator') {
 | |
|                 // check for combinator in group beginning and double combinator sequence
 | |
|                 if (prevToken === null || prevToken.type === 'Combinator') {
 | |
|                     tokenizer.pos = prevTokenPos;
 | |
|                     tokenizer.error('Unexpected combinator');
 | |
|                 }
 | |
| 
 | |
|                 combinators[token.value] = true;
 | |
|             } else if (prevToken !== null && prevToken.type !== 'Combinator') {
 | |
|                 combinators[' '] = true;  // a b
 | |
|                 terms.push({
 | |
|                     type: 'Combinator',
 | |
|                     value: ' '
 | |
|                 });
 | |
|             }
 | |
| 
 | |
|             terms.push(token);
 | |
|             prevToken = token;
 | |
|             prevTokenPos = tokenizer.pos;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     // check for combinator in group ending
 | |
|     if (prevToken !== null && prevToken.type === 'Combinator') {
 | |
|         tokenizer.pos -= prevTokenPos;
 | |
|         tokenizer.error('Unexpected combinator');
 | |
|     }
 | |
| 
 | |
|     return {
 | |
|         type: 'Group',
 | |
|         terms: terms,
 | |
|         combinator: regroupTerms(terms, combinators) || ' ',
 | |
|         disallowEmpty: false,
 | |
|         explicit: false
 | |
|     };
 | |
| }
 | |
| 
 | |
| function readGroup(tokenizer) {
 | |
|     var result;
 | |
| 
 | |
|     tokenizer.eat(LEFTSQUAREBRACKET);
 | |
|     result = readImplicitGroup(tokenizer);
 | |
|     tokenizer.eat(RIGHTSQUAREBRACKET);
 | |
| 
 | |
|     result.explicit = true;
 | |
| 
 | |
|     if (tokenizer.charCode() === EXCLAMATIONMARK) {
 | |
|         tokenizer.pos++;
 | |
|         result.disallowEmpty = true;
 | |
|     }
 | |
| 
 | |
|     return result;
 | |
| }
 | |
| 
 | |
| function peek(tokenizer) {
 | |
|     var code = tokenizer.charCode();
 | |
| 
 | |
|     if (code < 128 && NAME_CHAR[code] === 1) {
 | |
|         return readKeywordOrFunction(tokenizer);
 | |
|     }
 | |
| 
 | |
|     switch (code) {
 | |
|         case RIGHTSQUAREBRACKET:
 | |
|             // don't eat, stop scan a group
 | |
|             break;
 | |
| 
 | |
|         case LEFTSQUAREBRACKET:
 | |
|             return maybeMultiplied(tokenizer, readGroup(tokenizer));
 | |
| 
 | |
|         case LESSTHANSIGN:
 | |
|             return tokenizer.nextCharCode() === APOSTROPHE
 | |
|                 ? readProperty(tokenizer)
 | |
|                 : readType(tokenizer);
 | |
| 
 | |
|         case VERTICALLINE:
 | |
|             return {
 | |
|                 type: 'Combinator',
 | |
|                 value: tokenizer.substringToPos(
 | |
|                     tokenizer.nextCharCode() === VERTICALLINE
 | |
|                         ? tokenizer.pos + 2
 | |
|                         : tokenizer.pos + 1
 | |
|                 )
 | |
|             };
 | |
| 
 | |
|         case AMPERSAND:
 | |
|             tokenizer.pos++;
 | |
|             tokenizer.eat(AMPERSAND);
 | |
| 
 | |
|             return {
 | |
|                 type: 'Combinator',
 | |
|                 value: '&&'
 | |
|             };
 | |
| 
 | |
|         case COMMA:
 | |
|             tokenizer.pos++;
 | |
|             return {
 | |
|                 type: 'Comma'
 | |
|             };
 | |
| 
 | |
|         case APOSTROPHE:
 | |
|             return maybeMultiplied(tokenizer, {
 | |
|                 type: 'String',
 | |
|                 value: scanString(tokenizer)
 | |
|             });
 | |
| 
 | |
|         case SPACE:
 | |
|         case TAB:
 | |
|         case N:
 | |
|         case R:
 | |
|         case F:
 | |
|             return {
 | |
|                 type: 'Spaces',
 | |
|                 value: scanSpaces(tokenizer)
 | |
|             };
 | |
| 
 | |
|         case COMMERCIALAT:
 | |
|             code = tokenizer.nextCharCode();
 | |
| 
 | |
|             if (code < 128 && NAME_CHAR[code] === 1) {
 | |
|                 tokenizer.pos++;
 | |
|                 return {
 | |
|                     type: 'AtKeyword',
 | |
|                     name: scanWord(tokenizer)
 | |
|                 };
 | |
|             }
 | |
| 
 | |
|             return maybeToken(tokenizer);
 | |
| 
 | |
|         case ASTERISK:
 | |
|         case PLUSSIGN:
 | |
|         case QUESTIONMARK:
 | |
|         case NUMBERSIGN:
 | |
|         case EXCLAMATIONMARK:
 | |
|             // prohibited tokens (used as a multiplier start)
 | |
|             break;
 | |
| 
 | |
|         case LEFTCURLYBRACKET:
 | |
|             // LEFTCURLYBRACKET is allowed since mdn/data uses it w/o quoting
 | |
|             // check next char isn't a number, because it's likely a disjoined multiplier
 | |
|             code = tokenizer.nextCharCode();
 | |
| 
 | |
|             if (code < 48 || code > 57) {
 | |
|                 return maybeToken(tokenizer);
 | |
|             }
 | |
| 
 | |
|             break;
 | |
| 
 | |
|         default:
 | |
|             return maybeToken(tokenizer);
 | |
|     }
 | |
| }
 | |
| 
 | |
| function parse(source) {
 | |
|     var tokenizer = new Tokenizer(source);
 | |
|     var result = readImplicitGroup(tokenizer);
 | |
| 
 | |
|     if (tokenizer.pos !== source.length) {
 | |
|         tokenizer.error('Unexpected input');
 | |
|     }
 | |
| 
 | |
|     // reduce redundant groups with single group term
 | |
|     if (result.terms.length === 1 && result.terms[0].type === 'Group') {
 | |
|         result = result.terms[0];
 | |
|     }
 | |
| 
 | |
|     return result;
 | |
| }
 | |
| 
 | |
| // warm up parse to elimitate code branches that never execute
 | |
| // fix soft deoptimizations (insufficient type feedback)
 | |
| parse('[a&&<b>#|<\'c\'>*||e() f{2} /,(% g#{1,2} h{2,})]!');
 | |
| 
 | |
| module.exports = parse;
 |