301 lines
		
	
	
		
			9.9 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
			
		
		
	
	
			301 lines
		
	
	
		
			9.9 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
| /**
 | |
|  * @author Toru Nagashima <https://github.com/mysticatea>
 | |
|  */
 | |
| "use strict";
 | |
| 
 | |
| const { CALL, CONSTRUCT, ReferenceTracker, getStringIfConstant } = require("@eslint-community/eslint-utils");
 | |
| const { RegExpParser, visitRegExpAST } = require("@eslint-community/regexpp");
 | |
| const { isCombiningCharacter, isEmojiModifier, isRegionalIndicatorSymbol, isSurrogatePair } = require("./utils/unicode");
 | |
| const astUtils = require("./utils/ast-utils.js");
 | |
| const { isValidWithUnicodeFlag } = require("./utils/regular-expressions");
 | |
| 
 | |
| //------------------------------------------------------------------------------
 | |
| // Helpers
 | |
| //------------------------------------------------------------------------------
 | |
| 
 | |
| /**
 | |
|  * @typedef {import('@eslint-community/regexpp').AST.Character} Character
 | |
|  * @typedef {import('@eslint-community/regexpp').AST.CharacterClassElement} CharacterClassElement
 | |
|  */
 | |
| 
 | |
| /**
 | |
|  * Iterate character sequences of a given nodes.
 | |
|  *
 | |
|  * CharacterClassRange syntax can steal a part of character sequence,
 | |
|  * so this function reverts CharacterClassRange syntax and restore the sequence.
 | |
|  * @param {CharacterClassElement[]} nodes The node list to iterate character sequences.
 | |
|  * @returns {IterableIterator<Character[]>} The list of character sequences.
 | |
|  */
 | |
| function *iterateCharacterSequence(nodes) {
 | |
| 
 | |
|     /** @type {Character[]} */
 | |
|     let seq = [];
 | |
| 
 | |
|     for (const node of nodes) {
 | |
|         switch (node.type) {
 | |
|             case "Character":
 | |
|                 seq.push(node);
 | |
|                 break;
 | |
| 
 | |
|             case "CharacterClassRange":
 | |
|                 seq.push(node.min);
 | |
|                 yield seq;
 | |
|                 seq = [node.max];
 | |
|                 break;
 | |
| 
 | |
|             case "CharacterSet":
 | |
|             case "CharacterClass": // [[]] nesting character class
 | |
|             case "ClassStringDisjunction": // \q{...}
 | |
|             case "ExpressionCharacterClass": // [A--B]
 | |
|                 if (seq.length > 0) {
 | |
|                     yield seq;
 | |
|                     seq = [];
 | |
|                 }
 | |
|                 break;
 | |
| 
 | |
|             // no default
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     if (seq.length > 0) {
 | |
|         yield seq;
 | |
|     }
 | |
| }
 | |
| 
 | |
| 
 | |
| /**
 | |
|  * Checks whether the given character node is a Unicode code point escape or not.
 | |
|  * @param {Character} char the character node to check.
 | |
|  * @returns {boolean} `true` if the character node is a Unicode code point escape.
 | |
|  */
 | |
| function isUnicodeCodePointEscape(char) {
 | |
|     return /^\\u\{[\da-f]+\}$/iu.test(char.raw);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Each function returns `true` if it detects that kind of problem.
 | |
|  * @type {Record<string, (chars: Character[]) => boolean>}
 | |
|  */
 | |
| const hasCharacterSequence = {
 | |
|     surrogatePairWithoutUFlag(chars) {
 | |
|         return chars.some((c, i) => {
 | |
|             if (i === 0) {
 | |
|                 return false;
 | |
|             }
 | |
|             const c1 = chars[i - 1];
 | |
| 
 | |
|             return (
 | |
|                 isSurrogatePair(c1.value, c.value) &&
 | |
|                 !isUnicodeCodePointEscape(c1) &&
 | |
|                 !isUnicodeCodePointEscape(c)
 | |
|             );
 | |
|         });
 | |
|     },
 | |
| 
 | |
|     surrogatePair(chars) {
 | |
|         return chars.some((c, i) => {
 | |
|             if (i === 0) {
 | |
|                 return false;
 | |
|             }
 | |
|             const c1 = chars[i - 1];
 | |
| 
 | |
|             return (
 | |
|                 isSurrogatePair(c1.value, c.value) &&
 | |
|                 (
 | |
|                     isUnicodeCodePointEscape(c1) ||
 | |
|                     isUnicodeCodePointEscape(c)
 | |
|                 )
 | |
|             );
 | |
|         });
 | |
|     },
 | |
| 
 | |
|     combiningClass(chars) {
 | |
|         return chars.some((c, i) => (
 | |
|             i !== 0 &&
 | |
|             isCombiningCharacter(c.value) &&
 | |
|             !isCombiningCharacter(chars[i - 1].value)
 | |
|         ));
 | |
|     },
 | |
| 
 | |
|     emojiModifier(chars) {
 | |
|         return chars.some((c, i) => (
 | |
|             i !== 0 &&
 | |
|             isEmojiModifier(c.value) &&
 | |
|             !isEmojiModifier(chars[i - 1].value)
 | |
|         ));
 | |
|     },
 | |
| 
 | |
|     regionalIndicatorSymbol(chars) {
 | |
|         return chars.some((c, i) => (
 | |
|             i !== 0 &&
 | |
|             isRegionalIndicatorSymbol(c.value) &&
 | |
|             isRegionalIndicatorSymbol(chars[i - 1].value)
 | |
|         ));
 | |
|     },
 | |
| 
 | |
|     zwj(chars) {
 | |
|         const lastIndex = chars.length - 1;
 | |
| 
 | |
|         return chars.some((c, i) => (
 | |
|             i !== 0 &&
 | |
|             i !== lastIndex &&
 | |
|             c.value === 0x200d &&
 | |
|             chars[i - 1].value !== 0x200d &&
 | |
|             chars[i + 1].value !== 0x200d
 | |
|         ));
 | |
|     }
 | |
| };
 | |
| 
 | |
| const kinds = Object.keys(hasCharacterSequence);
 | |
| 
 | |
| //------------------------------------------------------------------------------
 | |
| // Rule Definition
 | |
| //------------------------------------------------------------------------------
 | |
| 
 | |
| /** @type {import('../shared/types').Rule} */
 | |
| module.exports = {
 | |
|     meta: {
 | |
|         type: "problem",
 | |
| 
 | |
|         docs: {
 | |
|             description: "Disallow characters which are made with multiple code points in character class syntax",
 | |
|             recommended: true,
 | |
|             url: "https://eslint.org/docs/latest/rules/no-misleading-character-class"
 | |
|         },
 | |
| 
 | |
|         hasSuggestions: true,
 | |
| 
 | |
|         schema: [],
 | |
| 
 | |
|         messages: {
 | |
|             surrogatePairWithoutUFlag: "Unexpected surrogate pair in character class. Use 'u' flag.",
 | |
|             surrogatePair: "Unexpected surrogate pair in character class.",
 | |
|             combiningClass: "Unexpected combined character in character class.",
 | |
|             emojiModifier: "Unexpected modified Emoji in character class.",
 | |
|             regionalIndicatorSymbol: "Unexpected national flag in character class.",
 | |
|             zwj: "Unexpected joined character sequence in character class.",
 | |
|             suggestUnicodeFlag: "Add unicode 'u' flag to regex."
 | |
|         }
 | |
|     },
 | |
|     create(context) {
 | |
|         const sourceCode = context.sourceCode;
 | |
|         const parser = new RegExpParser();
 | |
| 
 | |
|         /**
 | |
|          * Verify a given regular expression.
 | |
|          * @param {Node} node The node to report.
 | |
|          * @param {string} pattern The regular expression pattern to verify.
 | |
|          * @param {string} flags The flags of the regular expression.
 | |
|          * @param {Function} unicodeFixer Fixer for missing "u" flag.
 | |
|          * @returns {void}
 | |
|          */
 | |
|         function verify(node, pattern, flags, unicodeFixer) {
 | |
|             let patternNode;
 | |
| 
 | |
|             try {
 | |
|                 patternNode = parser.parsePattern(
 | |
|                     pattern,
 | |
|                     0,
 | |
|                     pattern.length,
 | |
|                     {
 | |
|                         unicode: flags.includes("u"),
 | |
|                         unicodeSets: flags.includes("v")
 | |
|                     }
 | |
|                 );
 | |
|             } catch {
 | |
| 
 | |
|                 // Ignore regular expressions with syntax errors
 | |
|                 return;
 | |
|             }
 | |
| 
 | |
|             const foundKinds = new Set();
 | |
| 
 | |
|             visitRegExpAST(patternNode, {
 | |
|                 onCharacterClassEnter(ccNode) {
 | |
|                     for (const chars of iterateCharacterSequence(ccNode.elements)) {
 | |
|                         for (const kind of kinds) {
 | |
|                             if (hasCharacterSequence[kind](chars)) {
 | |
|                                 foundKinds.add(kind);
 | |
|                             }
 | |
|                         }
 | |
|                     }
 | |
|                 }
 | |
|             });
 | |
| 
 | |
|             for (const kind of foundKinds) {
 | |
|                 let suggest;
 | |
| 
 | |
|                 if (kind === "surrogatePairWithoutUFlag") {
 | |
|                     suggest = [{
 | |
|                         messageId: "suggestUnicodeFlag",
 | |
|                         fix: unicodeFixer
 | |
|                     }];
 | |
|                 }
 | |
| 
 | |
|                 context.report({
 | |
|                     node,
 | |
|                     messageId: kind,
 | |
|                     suggest
 | |
|                 });
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         return {
 | |
|             "Literal[regex]"(node) {
 | |
|                 verify(node, node.regex.pattern, node.regex.flags, fixer => {
 | |
|                     if (!isValidWithUnicodeFlag(context.languageOptions.ecmaVersion, node.regex.pattern)) {
 | |
|                         return null;
 | |
|                     }
 | |
| 
 | |
|                     return fixer.insertTextAfter(node, "u");
 | |
|                 });
 | |
|             },
 | |
|             "Program"(node) {
 | |
|                 const scope = sourceCode.getScope(node);
 | |
|                 const tracker = new ReferenceTracker(scope);
 | |
| 
 | |
|                 /*
 | |
|                  * Iterate calls of RegExp.
 | |
|                  * E.g., `new RegExp()`, `RegExp()`, `new window.RegExp()`,
 | |
|                  *       `const {RegExp: a} = window; new a()`, etc...
 | |
|                  */
 | |
|                 for (const { node: refNode } of tracker.iterateGlobalReferences({
 | |
|                     RegExp: { [CALL]: true, [CONSTRUCT]: true }
 | |
|                 })) {
 | |
|                     const [patternNode, flagsNode] = refNode.arguments;
 | |
|                     const pattern = getStringIfConstant(patternNode, scope);
 | |
|                     const flags = getStringIfConstant(flagsNode, scope);
 | |
| 
 | |
|                     if (typeof pattern === "string") {
 | |
|                         verify(refNode, pattern, flags || "", fixer => {
 | |
| 
 | |
|                             if (!isValidWithUnicodeFlag(context.languageOptions.ecmaVersion, pattern)) {
 | |
|                                 return null;
 | |
|                             }
 | |
| 
 | |
|                             if (refNode.arguments.length === 1) {
 | |
|                                 const penultimateToken = sourceCode.getLastToken(refNode, { skip: 1 }); // skip closing parenthesis
 | |
| 
 | |
|                                 return fixer.insertTextAfter(
 | |
|                                     penultimateToken,
 | |
|                                     astUtils.isCommaToken(penultimateToken)
 | |
|                                         ? ' "u",'
 | |
|                                         : ', "u"'
 | |
|                                 );
 | |
|                             }
 | |
| 
 | |
|                             if ((flagsNode.type === "Literal" && typeof flagsNode.value === "string") || flagsNode.type === "TemplateLiteral") {
 | |
|                                 const range = [flagsNode.range[0], flagsNode.range[1] - 1];
 | |
| 
 | |
|                                 return fixer.insertTextAfterRange(range, "u");
 | |
|                             }
 | |
| 
 | |
|                             return null;
 | |
|                         });
 | |
|                     }
 | |
|                 }
 | |
|             }
 | |
|         };
 | |
|     }
 | |
| };
 |