592 lines
		
	
	
		
			23 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
			
		
		
	
	
			592 lines
		
	
	
		
			23 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
| var TokenStream = require('../common/TokenStream');
 | ||
| var adoptBuffer = require('../common/adopt-buffer');
 | ||
| 
 | ||
| var constants = require('./const');
 | ||
| var TYPE = constants.TYPE;
 | ||
| 
 | ||
| var charCodeDefinitions = require('./char-code-definitions');
 | ||
| var isNewline = charCodeDefinitions.isNewline;
 | ||
| var isName = charCodeDefinitions.isName;
 | ||
| var isValidEscape = charCodeDefinitions.isValidEscape;
 | ||
| var isNumberStart = charCodeDefinitions.isNumberStart;
 | ||
| var isIdentifierStart = charCodeDefinitions.isIdentifierStart;
 | ||
| var charCodeCategory = charCodeDefinitions.charCodeCategory;
 | ||
| var isBOM = charCodeDefinitions.isBOM;
 | ||
| 
 | ||
| var utils = require('./utils');
 | ||
| var cmpStr = utils.cmpStr;
 | ||
| var getNewlineLength = utils.getNewlineLength;
 | ||
| var findWhiteSpaceEnd = utils.findWhiteSpaceEnd;
 | ||
| var consumeEscaped = utils.consumeEscaped;
 | ||
| var consumeName = utils.consumeName;
 | ||
| var consumeNumber = utils.consumeNumber;
 | ||
| var consumeBadUrlRemnants = utils.consumeBadUrlRemnants;
 | ||
| 
 | ||
| var OFFSET_MASK = 0x00FFFFFF;
 | ||
| var TYPE_SHIFT = 24;
 | ||
| 
 | ||
| function tokenize(source, stream) {
 | ||
|     function getCharCode(offset) {
 | ||
|         return offset < sourceLength ? source.charCodeAt(offset) : 0;
 | ||
|     }
 | ||
| 
 | ||
|     // § 4.3.3. Consume a numeric token
 | ||
|     function consumeNumericToken() {
 | ||
|         // Consume a number and let number be the result.
 | ||
|         offset = consumeNumber(source, offset);
 | ||
| 
 | ||
|         // If the next 3 input code points would start an identifier, then:
 | ||
|         if (isIdentifierStart(getCharCode(offset), getCharCode(offset + 1), getCharCode(offset + 2))) {
 | ||
|             // Create a <dimension-token> with the same value and type flag as number, and a unit set initially to the empty string.
 | ||
|             // Consume a name. Set the <dimension-token>’s unit to the returned value.
 | ||
|             // Return the <dimension-token>.
 | ||
|             type = TYPE.Dimension;
 | ||
|             offset = consumeName(source, offset);
 | ||
|             return;
 | ||
|         }
 | ||
| 
 | ||
|         // Otherwise, if the next input code point is U+0025 PERCENTAGE SIGN (%), consume it.
 | ||
|         if (getCharCode(offset) === 0x0025) {
 | ||
|             // Create a <percentage-token> with the same value as number, and return it.
 | ||
|             type = TYPE.Percentage;
 | ||
|             offset++;
 | ||
|             return;
 | ||
|         }
 | ||
| 
 | ||
|         // Otherwise, create a <number-token> with the same value and type flag as number, and return it.
 | ||
|         type = TYPE.Number;
 | ||
|     }
 | ||
| 
 | ||
|     // § 4.3.4. Consume an ident-like token
 | ||
|     function consumeIdentLikeToken() {
 | ||
|         const nameStartOffset = offset;
 | ||
| 
 | ||
|         // Consume a name, and let string be the result.
 | ||
|         offset = consumeName(source, offset);
 | ||
| 
 | ||
|         // If string’s value is an ASCII case-insensitive match for "url",
 | ||
|         // and the next input code point is U+0028 LEFT PARENTHESIS ((), consume it.
 | ||
|         if (cmpStr(source, nameStartOffset, offset, 'url') && getCharCode(offset) === 0x0028) {
 | ||
|             // While the next two input code points are whitespace, consume the next input code point.
 | ||
|             offset = findWhiteSpaceEnd(source, offset + 1);
 | ||
| 
 | ||
|             // If the next one or two input code points are U+0022 QUOTATION MARK ("), U+0027 APOSTROPHE ('),
 | ||
|             // or whitespace followed by U+0022 QUOTATION MARK (") or U+0027 APOSTROPHE ('),
 | ||
|             // then create a <function-token> with its value set to string and return it.
 | ||
|             if (getCharCode(offset) === 0x0022 ||
 | ||
|                 getCharCode(offset) === 0x0027) {
 | ||
|                 type = TYPE.Function;
 | ||
|                 offset = nameStartOffset + 4;
 | ||
|                 return;
 | ||
|             }
 | ||
| 
 | ||
|             // Otherwise, consume a url token, and return it.
 | ||
|             consumeUrlToken();
 | ||
|             return;
 | ||
|         }
 | ||
| 
 | ||
|         // Otherwise, if the next input code point is U+0028 LEFT PARENTHESIS ((), consume it.
 | ||
|         // Create a <function-token> with its value set to string and return it.
 | ||
|         if (getCharCode(offset) === 0x0028) {
 | ||
|             type = TYPE.Function;
 | ||
|             offset++;
 | ||
|             return;
 | ||
|         }
 | ||
| 
 | ||
|         // Otherwise, create an <ident-token> with its value set to string and return it.
 | ||
|         type = TYPE.Ident;
 | ||
|     }
 | ||
| 
 | ||
|     // § 4.3.5. Consume a string token
 | ||
|     function consumeStringToken(endingCodePoint) {
 | ||
|         // This algorithm may be called with an ending code point, which denotes the code point
 | ||
|         // that ends the string. If an ending code point is not specified,
 | ||
|         // the current input code point is used.
 | ||
|         if (!endingCodePoint) {
 | ||
|             endingCodePoint = getCharCode(offset++);
 | ||
|         }
 | ||
| 
 | ||
|         // Initially create a <string-token> with its value set to the empty string.
 | ||
|         type = TYPE.String;
 | ||
| 
 | ||
|         // Repeatedly consume the next input code point from the stream:
 | ||
|         for (; offset < source.length; offset++) {
 | ||
|             var code = source.charCodeAt(offset);
 | ||
| 
 | ||
|             switch (charCodeCategory(code)) {
 | ||
|                 // ending code point
 | ||
|                 case endingCodePoint:
 | ||
|                     // Return the <string-token>.
 | ||
|                     offset++;
 | ||
|                     return;
 | ||
| 
 | ||
|                 // EOF
 | ||
|                 case charCodeCategory.Eof:
 | ||
|                     // This is a parse error. Return the <string-token>.
 | ||
|                     return;
 | ||
| 
 | ||
|                 // newline
 | ||
|                 case charCodeCategory.WhiteSpace:
 | ||
|                     if (isNewline(code)) {
 | ||
|                         // This is a parse error. Reconsume the current input code point,
 | ||
|                         // create a <bad-string-token>, and return it.
 | ||
|                         offset += getNewlineLength(source, offset, code);
 | ||
|                         type = TYPE.BadString;
 | ||
|                         return;
 | ||
|                     }
 | ||
|                     break;
 | ||
| 
 | ||
|                 // U+005C REVERSE SOLIDUS (\)
 | ||
|                 case 0x005C:
 | ||
|                     // If the next input code point is EOF, do nothing.
 | ||
|                     if (offset === source.length - 1) {
 | ||
|                         break;
 | ||
|                     }
 | ||
| 
 | ||
|                     var nextCode = getCharCode(offset + 1);
 | ||
| 
 | ||
|                     // Otherwise, if the next input code point is a newline, consume it.
 | ||
|                     if (isNewline(nextCode)) {
 | ||
|                         offset += getNewlineLength(source, offset + 1, nextCode);
 | ||
|                     } else if (isValidEscape(code, nextCode)) {
 | ||
|                         // Otherwise, (the stream starts with a valid escape) consume
 | ||
|                         // an escaped code point and append the returned code point to
 | ||
|                         // the <string-token>’s value.
 | ||
|                         offset = consumeEscaped(source, offset) - 1;
 | ||
|                     }
 | ||
|                     break;
 | ||
| 
 | ||
|                 // anything else
 | ||
|                 // Append the current input code point to the <string-token>’s value.
 | ||
|             }
 | ||
|         }
 | ||
|     }
 | ||
| 
 | ||
|     // § 4.3.6. Consume a url token
 | ||
|     // Note: This algorithm assumes that the initial "url(" has already been consumed.
 | ||
|     // This algorithm also assumes that it’s being called to consume an "unquoted" value, like url(foo).
 | ||
|     // A quoted value, like url("foo"), is parsed as a <function-token>. Consume an ident-like token
 | ||
|     // automatically handles this distinction; this algorithm shouldn’t be called directly otherwise.
 | ||
|     function consumeUrlToken() {
 | ||
|         // Initially create a <url-token> with its value set to the empty string.
 | ||
|         type = TYPE.Url;
 | ||
| 
 | ||
|         // Consume as much whitespace as possible.
 | ||
|         offset = findWhiteSpaceEnd(source, offset);
 | ||
| 
 | ||
|         // Repeatedly consume the next input code point from the stream:
 | ||
|         for (; offset < source.length; offset++) {
 | ||
|             var code = source.charCodeAt(offset);
 | ||
| 
 | ||
|             switch (charCodeCategory(code)) {
 | ||
|                 // U+0029 RIGHT PARENTHESIS ())
 | ||
|                 case 0x0029:
 | ||
|                     // Return the <url-token>.
 | ||
|                     offset++;
 | ||
|                     return;
 | ||
| 
 | ||
|                 // EOF
 | ||
|                 case charCodeCategory.Eof:
 | ||
|                     // This is a parse error. Return the <url-token>.
 | ||
|                     return;
 | ||
| 
 | ||
|                 // whitespace
 | ||
|                 case charCodeCategory.WhiteSpace:
 | ||
|                     // Consume as much whitespace as possible.
 | ||
|                     offset = findWhiteSpaceEnd(source, offset);
 | ||
| 
 | ||
|                     // If the next input code point is U+0029 RIGHT PARENTHESIS ()) or EOF,
 | ||
|                     // consume it and return the <url-token>
 | ||
|                     // (if EOF was encountered, this is a parse error);
 | ||
|                     if (getCharCode(offset) === 0x0029 || offset >= source.length) {
 | ||
|                         if (offset < source.length) {
 | ||
|                             offset++;
 | ||
|                         }
 | ||
|                         return;
 | ||
|                     }
 | ||
| 
 | ||
|                     // otherwise, consume the remnants of a bad url, create a <bad-url-token>,
 | ||
|                     // and return it.
 | ||
|                     offset = consumeBadUrlRemnants(source, offset);
 | ||
|                     type = TYPE.BadUrl;
 | ||
|                     return;
 | ||
| 
 | ||
|                 // U+0022 QUOTATION MARK (")
 | ||
|                 // U+0027 APOSTROPHE (')
 | ||
|                 // U+0028 LEFT PARENTHESIS (()
 | ||
|                 // non-printable code point
 | ||
|                 case 0x0022:
 | ||
|                 case 0x0027:
 | ||
|                 case 0x0028:
 | ||
|                 case charCodeCategory.NonPrintable:
 | ||
|                     // This is a parse error. Consume the remnants of a bad url,
 | ||
|                     // create a <bad-url-token>, and return it.
 | ||
|                     offset = consumeBadUrlRemnants(source, offset);
 | ||
|                     type = TYPE.BadUrl;
 | ||
|                     return;
 | ||
| 
 | ||
|                 // U+005C REVERSE SOLIDUS (\)
 | ||
|                 case 0x005C:
 | ||
|                     // If the stream starts with a valid escape, consume an escaped code point and
 | ||
|                     // append the returned code point to the <url-token>’s value.
 | ||
|                     if (isValidEscape(code, getCharCode(offset + 1))) {
 | ||
|                         offset = consumeEscaped(source, offset) - 1;
 | ||
|                         break;
 | ||
|                     }
 | ||
| 
 | ||
|                     // Otherwise, this is a parse error. Consume the remnants of a bad url,
 | ||
|                     // create a <bad-url-token>, and return it.
 | ||
|                     offset = consumeBadUrlRemnants(source, offset);
 | ||
|                     type = TYPE.BadUrl;
 | ||
|                     return;
 | ||
| 
 | ||
|                 // anything else
 | ||
|                 // Append the current input code point to the <url-token>’s value.
 | ||
|             }
 | ||
|         }
 | ||
|     }
 | ||
| 
 | ||
|     if (!stream) {
 | ||
|         stream = new TokenStream();
 | ||
|     }
 | ||
| 
 | ||
|     // ensure source is a string
 | ||
|     source = String(source || '');
 | ||
| 
 | ||
|     var sourceLength = source.length;
 | ||
|     var offsetAndType = adoptBuffer(stream.offsetAndType, sourceLength + 1); // +1 because of eof-token
 | ||
|     var balance = adoptBuffer(stream.balance, sourceLength + 1);
 | ||
|     var tokenCount = 0;
 | ||
|     var start = isBOM(getCharCode(0));
 | ||
|     var offset = start;
 | ||
|     var balanceCloseType = 0;
 | ||
|     var balanceStart = 0;
 | ||
|     var balancePrev = 0;
 | ||
| 
 | ||
|     // https://drafts.csswg.org/css-syntax-3/#consume-token
 | ||
|     // § 4.3.1. Consume a token
 | ||
|     while (offset < sourceLength) {
 | ||
|         var code = source.charCodeAt(offset);
 | ||
|         var type = 0;
 | ||
| 
 | ||
|         balance[tokenCount] = sourceLength;
 | ||
| 
 | ||
|         switch (charCodeCategory(code)) {
 | ||
|             // whitespace
 | ||
|             case charCodeCategory.WhiteSpace:
 | ||
|                 // Consume as much whitespace as possible. Return a <whitespace-token>.
 | ||
|                 type = TYPE.WhiteSpace;
 | ||
|                 offset = findWhiteSpaceEnd(source, offset + 1);
 | ||
|                 break;
 | ||
| 
 | ||
|             // U+0022 QUOTATION MARK (")
 | ||
|             case 0x0022:
 | ||
|                 // Consume a string token and return it.
 | ||
|                 consumeStringToken();
 | ||
|                 break;
 | ||
| 
 | ||
|             // U+0023 NUMBER SIGN (#)
 | ||
|             case 0x0023:
 | ||
|                 // If the next input code point is a name code point or the next two input code points are a valid escape, then:
 | ||
|                 if (isName(getCharCode(offset + 1)) || isValidEscape(getCharCode(offset + 1), getCharCode(offset + 2))) {
 | ||
|                     // Create a <hash-token>.
 | ||
|                     type = TYPE.Hash;
 | ||
| 
 | ||
|                     // If the next 3 input code points would start an identifier, set the <hash-token>’s type flag to "id".
 | ||
|                     // if (isIdentifierStart(getCharCode(offset + 1), getCharCode(offset + 2), getCharCode(offset + 3))) {
 | ||
|                     //     // TODO: set id flag
 | ||
|                     // }
 | ||
| 
 | ||
|                     // Consume a name, and set the <hash-token>’s value to the returned string.
 | ||
|                     offset = consumeName(source, offset + 1);
 | ||
| 
 | ||
|                     // Return the <hash-token>.
 | ||
|                 } else {
 | ||
|                     // Otherwise, return a <delim-token> with its value set to the current input code point.
 | ||
|                     type = TYPE.Delim;
 | ||
|                     offset++;
 | ||
|                 }
 | ||
| 
 | ||
|                 break;
 | ||
| 
 | ||
|             // U+0027 APOSTROPHE (')
 | ||
|             case 0x0027:
 | ||
|                 // Consume a string token and return it.
 | ||
|                 consumeStringToken();
 | ||
|                 break;
 | ||
| 
 | ||
|             // U+0028 LEFT PARENTHESIS (()
 | ||
|             case 0x0028:
 | ||
|                 // Return a <(-token>.
 | ||
|                 type = TYPE.LeftParenthesis;
 | ||
|                 offset++;
 | ||
|                 break;
 | ||
| 
 | ||
|             // U+0029 RIGHT PARENTHESIS ())
 | ||
|             case 0x0029:
 | ||
|                 // Return a <)-token>.
 | ||
|                 type = TYPE.RightParenthesis;
 | ||
|                 offset++;
 | ||
|                 break;
 | ||
| 
 | ||
|             // U+002B PLUS SIGN (+)
 | ||
|             case 0x002B:
 | ||
|                 // If the input stream starts with a number, ...
 | ||
|                 if (isNumberStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) {
 | ||
|                     // ... reconsume the current input code point, consume a numeric token, and return it.
 | ||
|                     consumeNumericToken();
 | ||
|                 } else {
 | ||
|                     // Otherwise, return a <delim-token> with its value set to the current input code point.
 | ||
|                     type = TYPE.Delim;
 | ||
|                     offset++;
 | ||
|                 }
 | ||
|                 break;
 | ||
| 
 | ||
|             // U+002C COMMA (,)
 | ||
|             case 0x002C:
 | ||
|                 // Return a <comma-token>.
 | ||
|                 type = TYPE.Comma;
 | ||
|                 offset++;
 | ||
|                 break;
 | ||
| 
 | ||
|             // U+002D HYPHEN-MINUS (-)
 | ||
|             case 0x002D:
 | ||
|                 // If the input stream starts with a number, reconsume the current input code point, consume a numeric token, and return it.
 | ||
|                 if (isNumberStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) {
 | ||
|                     consumeNumericToken();
 | ||
|                 } else {
 | ||
|                     // Otherwise, if the next 2 input code points are U+002D HYPHEN-MINUS U+003E GREATER-THAN SIGN (->), consume them and return a <CDC-token>.
 | ||
|                     if (getCharCode(offset + 1) === 0x002D &&
 | ||
|                         getCharCode(offset + 2) === 0x003E) {
 | ||
|                         type = TYPE.CDC;
 | ||
|                         offset = offset + 3;
 | ||
|                     } else {
 | ||
|                         // Otherwise, if the input stream starts with an identifier, ...
 | ||
|                         if (isIdentifierStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) {
 | ||
|                             // ... reconsume the current input code point, consume an ident-like token, and return it.
 | ||
|                             consumeIdentLikeToken();
 | ||
|                         } else {
 | ||
|                             // Otherwise, return a <delim-token> with its value set to the current input code point.
 | ||
|                             type = TYPE.Delim;
 | ||
|                             offset++;
 | ||
|                         }
 | ||
|                     }
 | ||
|                 }
 | ||
|                 break;
 | ||
| 
 | ||
|             // U+002E FULL STOP (.)
 | ||
|             case 0x002E:
 | ||
|                 // If the input stream starts with a number, ...
 | ||
|                 if (isNumberStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) {
 | ||
|                     // ... reconsume the current input code point, consume a numeric token, and return it.
 | ||
|                     consumeNumericToken();
 | ||
|                 } else {
 | ||
|                     // Otherwise, return a <delim-token> with its value set to the current input code point.
 | ||
|                     type = TYPE.Delim;
 | ||
|                     offset++;
 | ||
|                 }
 | ||
| 
 | ||
|                 break;
 | ||
| 
 | ||
|             // U+002F SOLIDUS (/)
 | ||
|             case 0x002F:
 | ||
|                 // If the next two input code point are U+002F SOLIDUS (/) followed by a U+002A ASTERISK (*),
 | ||
|                 if (getCharCode(offset + 1) === 0x002A) {
 | ||
|                     // ... consume them and all following code points up to and including the first U+002A ASTERISK (*)
 | ||
|                     // followed by a U+002F SOLIDUS (/), or up to an EOF code point.
 | ||
|                     type = TYPE.Comment;
 | ||
|                     offset = source.indexOf('*/', offset + 2) + 2;
 | ||
|                     if (offset === 1) {
 | ||
|                         offset = source.length;
 | ||
|                     }
 | ||
|                 } else {
 | ||
|                     type = TYPE.Delim;
 | ||
|                     offset++;
 | ||
|                 }
 | ||
|                 break;
 | ||
| 
 | ||
|             // U+003A COLON (:)
 | ||
|             case 0x003A:
 | ||
|                 // Return a <colon-token>.
 | ||
|                 type = TYPE.Colon;
 | ||
|                 offset++;
 | ||
|                 break;
 | ||
| 
 | ||
|             // U+003B SEMICOLON (;)
 | ||
|             case 0x003B:
 | ||
|                 // Return a <semicolon-token>.
 | ||
|                 type = TYPE.Semicolon;
 | ||
|                 offset++;
 | ||
|                 break;
 | ||
| 
 | ||
|             // U+003C LESS-THAN SIGN (<)
 | ||
|             case 0x003C:
 | ||
|                 // If the next 3 input code points are U+0021 EXCLAMATION MARK U+002D HYPHEN-MINUS U+002D HYPHEN-MINUS (!--), ...
 | ||
|                 if (getCharCode(offset + 1) === 0x0021 &&
 | ||
|                     getCharCode(offset + 2) === 0x002D &&
 | ||
|                     getCharCode(offset + 3) === 0x002D) {
 | ||
|                     // ... consume them and return a <CDO-token>.
 | ||
|                     type = TYPE.CDO;
 | ||
|                     offset = offset + 4;
 | ||
|                 } else {
 | ||
|                     // Otherwise, return a <delim-token> with its value set to the current input code point.
 | ||
|                     type = TYPE.Delim;
 | ||
|                     offset++;
 | ||
|                 }
 | ||
| 
 | ||
|                 break;
 | ||
| 
 | ||
|             // U+0040 COMMERCIAL AT (@)
 | ||
|             case 0x0040:
 | ||
|                 // If the next 3 input code points would start an identifier, ...
 | ||
|                 if (isIdentifierStart(getCharCode(offset + 1), getCharCode(offset + 2), getCharCode(offset + 3))) {
 | ||
|                     // ... consume a name, create an <at-keyword-token> with its value set to the returned value, and return it.
 | ||
|                     type = TYPE.AtKeyword;
 | ||
|                     offset = consumeName(source, offset + 1);
 | ||
|                 } else {
 | ||
|                     // Otherwise, return a <delim-token> with its value set to the current input code point.
 | ||
|                     type = TYPE.Delim;
 | ||
|                     offset++;
 | ||
|                 }
 | ||
| 
 | ||
|                 break;
 | ||
| 
 | ||
|             // U+005B LEFT SQUARE BRACKET ([)
 | ||
|             case 0x005B:
 | ||
|                 // Return a <[-token>.
 | ||
|                 type = TYPE.LeftSquareBracket;
 | ||
|                 offset++;
 | ||
|                 break;
 | ||
| 
 | ||
|             // U+005C REVERSE SOLIDUS (\)
 | ||
|             case 0x005C:
 | ||
|                 // If the input stream starts with a valid escape, ...
 | ||
|                 if (isValidEscape(code, getCharCode(offset + 1))) {
 | ||
|                     // ... reconsume the current input code point, consume an ident-like token, and return it.
 | ||
|                     consumeIdentLikeToken();
 | ||
|                 } else {
 | ||
|                     // Otherwise, this is a parse error. Return a <delim-token> with its value set to the current input code point.
 | ||
|                     type = TYPE.Delim;
 | ||
|                     offset++;
 | ||
|                 }
 | ||
|                 break;
 | ||
| 
 | ||
|             // U+005D RIGHT SQUARE BRACKET (])
 | ||
|             case 0x005D:
 | ||
|                 // Return a <]-token>.
 | ||
|                 type = TYPE.RightSquareBracket;
 | ||
|                 offset++;
 | ||
|                 break;
 | ||
| 
 | ||
|             // U+007B LEFT CURLY BRACKET ({)
 | ||
|             case 0x007B:
 | ||
|                 // Return a <{-token>.
 | ||
|                 type = TYPE.LeftCurlyBracket;
 | ||
|                 offset++;
 | ||
|                 break;
 | ||
| 
 | ||
|             // U+007D RIGHT CURLY BRACKET (})
 | ||
|             case 0x007D:
 | ||
|                 // Return a <}-token>.
 | ||
|                 type = TYPE.RightCurlyBracket;
 | ||
|                 offset++;
 | ||
|                 break;
 | ||
| 
 | ||
|             // digit
 | ||
|             case charCodeCategory.Digit:
 | ||
|                 // Reconsume the current input code point, consume a numeric token, and return it.
 | ||
|                 consumeNumericToken();
 | ||
|                 break;
 | ||
| 
 | ||
|             // name-start code point
 | ||
|             case charCodeCategory.NameStart:
 | ||
|                 // Reconsume the current input code point, consume an ident-like token, and return it.
 | ||
|                 consumeIdentLikeToken();
 | ||
|                 break;
 | ||
| 
 | ||
|             // EOF
 | ||
|             case charCodeCategory.Eof:
 | ||
|                 // Return an <EOF-token>.
 | ||
|                 break;
 | ||
| 
 | ||
|             // anything else
 | ||
|             default:
 | ||
|                 // Return a <delim-token> with its value set to the current input code point.
 | ||
|                 type = TYPE.Delim;
 | ||
|                 offset++;
 | ||
|         }
 | ||
| 
 | ||
|         switch (type) {
 | ||
|             case balanceCloseType:
 | ||
|                 balancePrev = balanceStart & OFFSET_MASK;
 | ||
|                 balanceStart = balance[balancePrev];
 | ||
|                 balanceCloseType = balanceStart >> TYPE_SHIFT;
 | ||
|                 balance[tokenCount] = balancePrev;
 | ||
|                 balance[balancePrev++] = tokenCount;
 | ||
|                 for (; balancePrev < tokenCount; balancePrev++) {
 | ||
|                     if (balance[balancePrev] === sourceLength) {
 | ||
|                         balance[balancePrev] = tokenCount;
 | ||
|                     }
 | ||
|                 }
 | ||
|                 break;
 | ||
| 
 | ||
|             case TYPE.LeftParenthesis:
 | ||
|             case TYPE.Function:
 | ||
|                 balance[tokenCount] = balanceStart;
 | ||
|                 balanceCloseType = TYPE.RightParenthesis;
 | ||
|                 balanceStart = (balanceCloseType << TYPE_SHIFT) | tokenCount;
 | ||
|                 break;
 | ||
| 
 | ||
|             case TYPE.LeftSquareBracket:
 | ||
|                 balance[tokenCount] = balanceStart;
 | ||
|                 balanceCloseType = TYPE.RightSquareBracket;
 | ||
|                 balanceStart = (balanceCloseType << TYPE_SHIFT) | tokenCount;
 | ||
|                 break;
 | ||
| 
 | ||
|             case TYPE.LeftCurlyBracket:
 | ||
|                 balance[tokenCount] = balanceStart;
 | ||
|                 balanceCloseType = TYPE.RightCurlyBracket;
 | ||
|                 balanceStart = (balanceCloseType << TYPE_SHIFT) | tokenCount;
 | ||
|                 break;
 | ||
|         }
 | ||
| 
 | ||
|         offsetAndType[tokenCount++] = (type << TYPE_SHIFT) | offset;
 | ||
|     }
 | ||
| 
 | ||
|     // finalize buffers
 | ||
|     offsetAndType[tokenCount] = (TYPE.EOF << TYPE_SHIFT) | offset; // <EOF-token>
 | ||
|     balance[tokenCount] = sourceLength;
 | ||
|     balance[sourceLength] = sourceLength; // prevents false positive balance match with any token
 | ||
|     while (balanceStart !== 0) {
 | ||
|         balancePrev = balanceStart & OFFSET_MASK;
 | ||
|         balanceStart = balance[balancePrev];
 | ||
|         balance[balancePrev] = sourceLength;
 | ||
|     }
 | ||
| 
 | ||
|     // update stream
 | ||
|     stream.source = source;
 | ||
|     stream.firstCharOffset = start;
 | ||
|     stream.offsetAndType = offsetAndType;
 | ||
|     stream.tokenCount = tokenCount;
 | ||
|     stream.balance = balance;
 | ||
|     stream.reset();
 | ||
|     stream.next();
 | ||
| 
 | ||
|     return stream;
 | ||
| }
 | ||
| 
 | ||
| // extend tokenizer with constants
 | ||
| Object.keys(constants).forEach(function(key) {
 | ||
|     tokenize[key] = constants[key];
 | ||
| });
 | ||
| 
 | ||
| // extend tokenizer with static methods from utils
 | ||
| Object.keys(charCodeDefinitions).forEach(function(key) {
 | ||
|     tokenize[key] = charCodeDefinitions[key];
 | ||
| });
 | ||
| Object.keys(utils).forEach(function(key) {
 | ||
|     tokenize[key] = utils[key];
 | ||
| });
 | ||
| 
 | ||
| module.exports = tokenize;
 |