250 lines
		
	
	
		
			7.0 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
			
		
		
	
	
			250 lines
		
	
	
		
			7.0 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
| var EOF = 0;
 | |
| 
 | |
| // https://drafts.csswg.org/css-syntax-3/
 | |
| // § 4.2. Definitions
 | |
| 
 | |
| // digit
 | |
| // A code point between U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9).
 | |
| function isDigit(code) {
 | |
|     return code >= 0x0030 && code <= 0x0039;
 | |
| }
 | |
| 
 | |
| // hex digit
 | |
| // A digit, or a code point between U+0041 LATIN CAPITAL LETTER A (A) and U+0046 LATIN CAPITAL LETTER F (F),
 | |
| // or a code point between U+0061 LATIN SMALL LETTER A (a) and U+0066 LATIN SMALL LETTER F (f).
 | |
| function isHexDigit(code) {
 | |
|     return (
 | |
|         isDigit(code) || // 0 .. 9
 | |
|         (code >= 0x0041 && code <= 0x0046) || // A .. F
 | |
|         (code >= 0x0061 && code <= 0x0066)    // a .. f
 | |
|     );
 | |
| }
 | |
| 
 | |
| // uppercase letter
 | |
| // A code point between U+0041 LATIN CAPITAL LETTER A (A) and U+005A LATIN CAPITAL LETTER Z (Z).
 | |
| function isUppercaseLetter(code) {
 | |
|     return code >= 0x0041 && code <= 0x005A;
 | |
| }
 | |
| 
 | |
| // lowercase letter
 | |
| // A code point between U+0061 LATIN SMALL LETTER A (a) and U+007A LATIN SMALL LETTER Z (z).
 | |
| function isLowercaseLetter(code) {
 | |
|     return code >= 0x0061 && code <= 0x007A;
 | |
| }
 | |
| 
 | |
| // letter
 | |
| // An uppercase letter or a lowercase letter.
 | |
| function isLetter(code) {
 | |
|     return isUppercaseLetter(code) || isLowercaseLetter(code);
 | |
| }
 | |
| 
 | |
| // non-ASCII code point
 | |
| // A code point with a value equal to or greater than U+0080 <control>.
 | |
| function isNonAscii(code) {
 | |
|     return code >= 0x0080;
 | |
| }
 | |
| 
 | |
| // name-start code point
 | |
| // A letter, a non-ASCII code point, or U+005F LOW LINE (_).
 | |
| function isNameStart(code) {
 | |
|     return isLetter(code) || isNonAscii(code) || code === 0x005F;
 | |
| }
 | |
| 
 | |
| // name code point
 | |
| // A name-start code point, a digit, or U+002D HYPHEN-MINUS (-).
 | |
| function isName(code) {
 | |
|     return isNameStart(code) || isDigit(code) || code === 0x002D;
 | |
| }
 | |
| 
 | |
| // non-printable code point
 | |
| // A code point between U+0000 NULL and U+0008 BACKSPACE, or U+000B LINE TABULATION,
 | |
| // or a code point between U+000E SHIFT OUT and U+001F INFORMATION SEPARATOR ONE, or U+007F DELETE.
 | |
| function isNonPrintable(code) {
 | |
|     return (
 | |
|         (code >= 0x0000 && code <= 0x0008) ||
 | |
|         (code === 0x000B) ||
 | |
|         (code >= 0x000E && code <= 0x001F) ||
 | |
|         (code === 0x007F)
 | |
|     );
 | |
| }
 | |
| 
 | |
| // newline
 | |
| // U+000A LINE FEED. Note that U+000D CARRIAGE RETURN and U+000C FORM FEED are not included in this definition,
 | |
| // as they are converted to U+000A LINE FEED during preprocessing.
 | |
| // TODO: we doesn't do a preprocessing, so check a code point for U+000D CARRIAGE RETURN and U+000C FORM FEED
 | |
| function isNewline(code) {
 | |
|     return code === 0x000A || code === 0x000D || code === 0x000C;
 | |
| }
 | |
| 
 | |
| // whitespace
 | |
| // A newline, U+0009 CHARACTER TABULATION, or U+0020 SPACE.
 | |
| function isWhiteSpace(code) {
 | |
|     return isNewline(code) || code === 0x0020 || code === 0x0009;
 | |
| }
 | |
| 
 | |
| // § 4.3.8. Check if two code points are a valid escape
 | |
| function isValidEscape(first, second) {
 | |
|     // If the first code point is not U+005C REVERSE SOLIDUS (\), return false.
 | |
|     if (first !== 0x005C) {
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     // Otherwise, if the second code point is a newline or EOF, return false.
 | |
|     if (isNewline(second) || second === EOF) {
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     // Otherwise, return true.
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| // § 4.3.9. Check if three code points would start an identifier
 | |
| function isIdentifierStart(first, second, third) {
 | |
|     // Look at the first code point:
 | |
| 
 | |
|     // U+002D HYPHEN-MINUS
 | |
|     if (first === 0x002D) {
 | |
|         // If the second code point is a name-start code point or a U+002D HYPHEN-MINUS,
 | |
|         // or the second and third code points are a valid escape, return true. Otherwise, return false.
 | |
|         return (
 | |
|             isNameStart(second) ||
 | |
|             second === 0x002D ||
 | |
|             isValidEscape(second, third)
 | |
|         );
 | |
|     }
 | |
| 
 | |
|     // name-start code point
 | |
|     if (isNameStart(first)) {
 | |
|         // Return true.
 | |
|         return true;
 | |
|     }
 | |
| 
 | |
|     // U+005C REVERSE SOLIDUS (\)
 | |
|     if (first === 0x005C) {
 | |
|         // If the first and second code points are a valid escape, return true. Otherwise, return false.
 | |
|         return isValidEscape(first, second);
 | |
|     }
 | |
| 
 | |
|     // anything else
 | |
|     // Return false.
 | |
|     return false;
 | |
| }
 | |
| 
 | |
| // § 4.3.10. Check if three code points would start a number
 | |
| function isNumberStart(first, second, third) {
 | |
|     // Look at the first code point:
 | |
| 
 | |
|     // U+002B PLUS SIGN (+)
 | |
|     // U+002D HYPHEN-MINUS (-)
 | |
|     if (first === 0x002B || first === 0x002D) {
 | |
|         // If the second code point is a digit, return true.
 | |
|         if (isDigit(second)) {
 | |
|             return 2;
 | |
|         }
 | |
| 
 | |
|         // Otherwise, if the second code point is a U+002E FULL STOP (.)
 | |
|         // and the third code point is a digit, return true.
 | |
|         // Otherwise, return false.
 | |
|         return second === 0x002E && isDigit(third) ? 3 : 0;
 | |
|     }
 | |
| 
 | |
|     // U+002E FULL STOP (.)
 | |
|     if (first === 0x002E) {
 | |
|         // If the second code point is a digit, return true. Otherwise, return false.
 | |
|         return isDigit(second) ? 2 : 0;
 | |
|     }
 | |
| 
 | |
|     // digit
 | |
|     if (isDigit(first)) {
 | |
|         // Return true.
 | |
|         return 1;
 | |
|     }
 | |
| 
 | |
|     // anything else
 | |
|     // Return false.
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| //
 | |
| // Misc
 | |
| //
 | |
| 
 | |
| // detect BOM (https://en.wikipedia.org/wiki/Byte_order_mark)
 | |
| function isBOM(code) {
 | |
|     // UTF-16BE
 | |
|     if (code === 0xFEFF) {
 | |
|         return 1;
 | |
|     }
 | |
| 
 | |
|     // UTF-16LE
 | |
|     if (code === 0xFFFE) {
 | |
|         return 1;
 | |
|     }
 | |
| 
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| // Fast code category
 | |
| //
 | |
| // https://drafts.csswg.org/css-syntax/#tokenizer-definitions
 | |
| // > non-ASCII code point
 | |
| // >   A code point with a value equal to or greater than U+0080 <control>
 | |
| // > name-start code point
 | |
| // >   A letter, a non-ASCII code point, or U+005F LOW LINE (_).
 | |
| // > name code point
 | |
| // >   A name-start code point, a digit, or U+002D HYPHEN-MINUS (-)
 | |
| // That means only ASCII code points has a special meaning and we define a maps for 0..127 codes only
 | |
| var CATEGORY = new Array(0x80);
 | |
| charCodeCategory.Eof = 0x80;
 | |
| charCodeCategory.WhiteSpace = 0x82;
 | |
| charCodeCategory.Digit = 0x83;
 | |
| charCodeCategory.NameStart = 0x84;
 | |
| charCodeCategory.NonPrintable = 0x85;
 | |
| 
 | |
| for (var i = 0; i < CATEGORY.length; i++) {
 | |
|     switch (true) {
 | |
|         case isWhiteSpace(i):
 | |
|             CATEGORY[i] = charCodeCategory.WhiteSpace;
 | |
|             break;
 | |
| 
 | |
|         case isDigit(i):
 | |
|             CATEGORY[i] = charCodeCategory.Digit;
 | |
|             break;
 | |
| 
 | |
|         case isNameStart(i):
 | |
|             CATEGORY[i] = charCodeCategory.NameStart;
 | |
|             break;
 | |
| 
 | |
|         case isNonPrintable(i):
 | |
|             CATEGORY[i] = charCodeCategory.NonPrintable;
 | |
|             break;
 | |
| 
 | |
|         default:
 | |
|             CATEGORY[i] = i || charCodeCategory.Eof;
 | |
|     }
 | |
| }
 | |
| 
 | |
| function charCodeCategory(code) {
 | |
|     return code < 0x80 ? CATEGORY[code] : charCodeCategory.NameStart;
 | |
| };
 | |
| 
 | |
| module.exports = {
 | |
|     isDigit: isDigit,
 | |
|     isHexDigit: isHexDigit,
 | |
|     isUppercaseLetter: isUppercaseLetter,
 | |
|     isLowercaseLetter: isLowercaseLetter,
 | |
|     isLetter: isLetter,
 | |
|     isNonAscii: isNonAscii,
 | |
|     isNameStart: isNameStart,
 | |
|     isName: isName,
 | |
|     isNonPrintable: isNonPrintable,
 | |
|     isNewline: isNewline,
 | |
|     isWhiteSpace: isWhiteSpace,
 | |
|     isValidEscape: isValidEscape,
 | |
|     isIdentifierStart: isIdentifierStart,
 | |
|     isNumberStart: isNumberStart,
 | |
| 
 | |
|     isBOM: isBOM,
 | |
|     charCodeCategory: charCodeCategory
 | |
| };
 |