182 lines
		
	
	
		
			5.6 KiB
		
	
	
	
		
			TypeScript
		
	
	
	
	
	
			
		
		
	
	
			182 lines
		
	
	
		
			5.6 KiB
		
	
	
	
		
			TypeScript
		
	
	
	
	
	
| /** All the states the tokenizer can be in. */
 | |
| declare const enum State {
 | |
|     Text = 1,
 | |
|     BeforeTagName = 2,
 | |
|     InTagName = 3,
 | |
|     InSelfClosingTag = 4,
 | |
|     BeforeClosingTagName = 5,
 | |
|     InClosingTagName = 6,
 | |
|     AfterClosingTagName = 7,
 | |
|     BeforeAttributeName = 8,
 | |
|     InAttributeName = 9,
 | |
|     AfterAttributeName = 10,
 | |
|     BeforeAttributeValue = 11,
 | |
|     InAttributeValueDq = 12,
 | |
|     InAttributeValueSq = 13,
 | |
|     InAttributeValueNq = 14,
 | |
|     BeforeDeclaration = 15,
 | |
|     InDeclaration = 16,
 | |
|     InProcessingInstruction = 17,
 | |
|     BeforeComment = 18,
 | |
|     InComment = 19,
 | |
|     InSpecialComment = 20,
 | |
|     AfterComment1 = 21,
 | |
|     AfterComment2 = 22,
 | |
|     BeforeCdata1 = 23,
 | |
|     BeforeCdata2 = 24,
 | |
|     BeforeCdata3 = 25,
 | |
|     BeforeCdata4 = 26,
 | |
|     BeforeCdata5 = 27,
 | |
|     BeforeCdata6 = 28,
 | |
|     InCdata = 29,
 | |
|     AfterCdata1 = 30,
 | |
|     AfterCdata2 = 31,
 | |
|     BeforeSpecialS = 32,
 | |
|     BeforeSpecialSEnd = 33,
 | |
|     BeforeScript1 = 34,
 | |
|     BeforeScript2 = 35,
 | |
|     BeforeScript3 = 36,
 | |
|     BeforeScript4 = 37,
 | |
|     BeforeScript5 = 38,
 | |
|     AfterScript1 = 39,
 | |
|     AfterScript2 = 40,
 | |
|     AfterScript3 = 41,
 | |
|     AfterScript4 = 42,
 | |
|     AfterScript5 = 43,
 | |
|     BeforeStyle1 = 44,
 | |
|     BeforeStyle2 = 45,
 | |
|     BeforeStyle3 = 46,
 | |
|     BeforeStyle4 = 47,
 | |
|     AfterStyle1 = 48,
 | |
|     AfterStyle2 = 49,
 | |
|     AfterStyle3 = 50,
 | |
|     AfterStyle4 = 51,
 | |
|     BeforeSpecialT = 52,
 | |
|     BeforeSpecialTEnd = 53,
 | |
|     BeforeTitle1 = 54,
 | |
|     BeforeTitle2 = 55,
 | |
|     BeforeTitle3 = 56,
 | |
|     BeforeTitle4 = 57,
 | |
|     AfterTitle1 = 58,
 | |
|     AfterTitle2 = 59,
 | |
|     AfterTitle3 = 60,
 | |
|     AfterTitle4 = 61,
 | |
|     BeforeEntity = 62,
 | |
|     BeforeNumericEntity = 63,
 | |
|     InNamedEntity = 64,
 | |
|     InNumericEntity = 65,
 | |
|     InHexEntity = 66
 | |
| }
 | |
| export interface Callbacks {
 | |
|     onattribdata(value: string): void;
 | |
|     onattribend(quote: string | undefined | null): void;
 | |
|     onattribname(name: string): void;
 | |
|     oncdata(data: string): void;
 | |
|     onclosetag(name: string): void;
 | |
|     oncomment(data: string): void;
 | |
|     ondeclaration(content: string): void;
 | |
|     onend(): void;
 | |
|     onerror(error: Error, state?: State): void;
 | |
|     onopentagend(): void;
 | |
|     onopentagname(name: string): void;
 | |
|     onprocessinginstruction(instruction: string): void;
 | |
|     onselfclosingtag(): void;
 | |
|     ontext(value: string): void;
 | |
| }
 | |
| export default class Tokenizer {
 | |
|     /** The current state the tokenizer is in. */
 | |
|     _state: State;
 | |
|     /** The read buffer. */
 | |
|     private buffer;
 | |
|     /** The beginning of the section that is currently being read. */
 | |
|     sectionStart: number;
 | |
|     /** The index within the buffer that we are currently looking at. */
 | |
|     _index: number;
 | |
|     /**
 | |
|      * Data that has already been processed will be removed from the buffer occasionally.
 | |
|      * `_bufferOffset` keeps track of how many characters have been removed, to make sure position information is accurate.
 | |
|      */
 | |
|     private bufferOffset;
 | |
|     /** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */
 | |
|     private baseState;
 | |
|     /** For special parsing behavior inside of script and style tags. */
 | |
|     private special;
 | |
|     /** Indicates whether the tokenizer has been paused. */
 | |
|     private running;
 | |
|     /** Indicates whether the tokenizer has finished running / `.end` has been called. */
 | |
|     private ended;
 | |
|     private readonly cbs;
 | |
|     private readonly xmlMode;
 | |
|     private readonly decodeEntities;
 | |
|     constructor(options: {
 | |
|         xmlMode?: boolean;
 | |
|         decodeEntities?: boolean;
 | |
|     } | null, cbs: Callbacks);
 | |
|     reset(): void;
 | |
|     write(chunk: string): void;
 | |
|     end(chunk?: string): void;
 | |
|     pause(): void;
 | |
|     resume(): void;
 | |
|     /**
 | |
|      * The current index within all of the written data.
 | |
|      */
 | |
|     getAbsoluteIndex(): number;
 | |
|     private stateText;
 | |
|     /**
 | |
|      * HTML only allows ASCII alpha characters (a-z and A-Z) at the beginning of a tag name.
 | |
|      *
 | |
|      * XML allows a lot more characters here (@see https://www.w3.org/TR/REC-xml/#NT-NameStartChar).
 | |
|      * We allow anything that wouldn't end the tag.
 | |
|      */
 | |
|     private isTagStartChar;
 | |
|     private stateBeforeTagName;
 | |
|     private stateInTagName;
 | |
|     private stateBeforeClosingTagName;
 | |
|     private stateInClosingTagName;
 | |
|     private stateAfterClosingTagName;
 | |
|     private stateBeforeAttributeName;
 | |
|     private stateInSelfClosingTag;
 | |
|     private stateInAttributeName;
 | |
|     private stateAfterAttributeName;
 | |
|     private stateBeforeAttributeValue;
 | |
|     private handleInAttributeValue;
 | |
|     private stateInAttributeValueDoubleQuotes;
 | |
|     private stateInAttributeValueSingleQuotes;
 | |
|     private stateInAttributeValueNoQuotes;
 | |
|     private stateBeforeDeclaration;
 | |
|     private stateInDeclaration;
 | |
|     private stateInProcessingInstruction;
 | |
|     private stateBeforeComment;
 | |
|     private stateInComment;
 | |
|     private stateInSpecialComment;
 | |
|     private stateAfterComment1;
 | |
|     private stateAfterComment2;
 | |
|     private stateBeforeCdata6;
 | |
|     private stateInCdata;
 | |
|     private stateAfterCdata1;
 | |
|     private stateAfterCdata2;
 | |
|     private stateBeforeSpecialS;
 | |
|     private stateBeforeSpecialSEnd;
 | |
|     private stateBeforeSpecialLast;
 | |
|     private stateAfterSpecialLast;
 | |
|     private parseFixedEntity;
 | |
|     private parseLegacyEntity;
 | |
|     private stateInNamedEntity;
 | |
|     private decodeNumericEntity;
 | |
|     private stateInNumericEntity;
 | |
|     private stateInHexEntity;
 | |
|     private cleanup;
 | |
|     /**
 | |
|      * Iterates through the buffer, calling the function corresponding to the current state.
 | |
|      *
 | |
|      * States that are more likely to be hit are higher up, as a performance improvement.
 | |
|      */
 | |
|     private parse;
 | |
|     private finish;
 | |
|     private handleTrailingData;
 | |
|     private getSection;
 | |
|     private emitToken;
 | |
|     private emitPartial;
 | |
| }
 | |
| export {};
 | |
| //# sourceMappingURL=Tokenizer.d.ts.map
 |