220 lines
		
	
	
		
			6.0 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
			
		
		
	
	
			220 lines
		
	
	
		
			6.0 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
| var constants = require('../tokenizer/const');
 | |
| var TYPE = constants.TYPE;
 | |
| var NAME = constants.NAME;
 | |
| 
 | |
| var utils = require('../tokenizer/utils');
 | |
| var cmpStr = utils.cmpStr;
 | |
| 
 | |
| var EOF = TYPE.EOF;
 | |
| var WHITESPACE = TYPE.WhiteSpace;
 | |
| var COMMENT = TYPE.Comment;
 | |
| 
 | |
| var OFFSET_MASK = 0x00FFFFFF;
 | |
| var TYPE_SHIFT = 24;
 | |
| 
 | |
| var TokenStream = function() {
 | |
|     this.offsetAndType = null;
 | |
|     this.balance = null;
 | |
| 
 | |
|     this.reset();
 | |
| };
 | |
| 
 | |
| TokenStream.prototype = {
 | |
|     reset: function() {
 | |
|         this.eof = false;
 | |
|         this.tokenIndex = -1;
 | |
|         this.tokenType = 0;
 | |
|         this.tokenStart = this.firstCharOffset;
 | |
|         this.tokenEnd = this.firstCharOffset;
 | |
|     },
 | |
| 
 | |
|     lookupType: function(offset) {
 | |
|         offset += this.tokenIndex;
 | |
| 
 | |
|         if (offset < this.tokenCount) {
 | |
|             return this.offsetAndType[offset] >> TYPE_SHIFT;
 | |
|         }
 | |
| 
 | |
|         return EOF;
 | |
|     },
 | |
|     lookupOffset: function(offset) {
 | |
|         offset += this.tokenIndex;
 | |
| 
 | |
|         if (offset < this.tokenCount) {
 | |
|             return this.offsetAndType[offset - 1] & OFFSET_MASK;
 | |
|         }
 | |
| 
 | |
|         return this.source.length;
 | |
|     },
 | |
|     lookupValue: function(offset, referenceStr) {
 | |
|         offset += this.tokenIndex;
 | |
| 
 | |
|         if (offset < this.tokenCount) {
 | |
|             return cmpStr(
 | |
|                 this.source,
 | |
|                 this.offsetAndType[offset - 1] & OFFSET_MASK,
 | |
|                 this.offsetAndType[offset] & OFFSET_MASK,
 | |
|                 referenceStr
 | |
|             );
 | |
|         }
 | |
| 
 | |
|         return false;
 | |
|     },
 | |
|     getTokenStart: function(tokenIndex) {
 | |
|         if (tokenIndex === this.tokenIndex) {
 | |
|             return this.tokenStart;
 | |
|         }
 | |
| 
 | |
|         if (tokenIndex > 0) {
 | |
|             return tokenIndex < this.tokenCount
 | |
|                 ? this.offsetAndType[tokenIndex - 1] & OFFSET_MASK
 | |
|                 : this.offsetAndType[this.tokenCount] & OFFSET_MASK;
 | |
|         }
 | |
| 
 | |
|         return this.firstCharOffset;
 | |
|     },
 | |
| 
 | |
|     // TODO: -> skipUntilBalanced
 | |
|     getRawLength: function(startToken, mode) {
 | |
|         var cursor = startToken;
 | |
|         var balanceEnd;
 | |
|         var offset = this.offsetAndType[Math.max(cursor - 1, 0)] & OFFSET_MASK;
 | |
|         var type;
 | |
| 
 | |
|         loop:
 | |
|         for (; cursor < this.tokenCount; cursor++) {
 | |
|             balanceEnd = this.balance[cursor];
 | |
| 
 | |
|             // stop scanning on balance edge that points to offset before start token
 | |
|             if (balanceEnd < startToken) {
 | |
|                 break loop;
 | |
|             }
 | |
| 
 | |
|             type = this.offsetAndType[cursor] >> TYPE_SHIFT;
 | |
| 
 | |
|             // check token is stop type
 | |
|             switch (mode(type, this.source, offset)) {
 | |
|                 case 1:
 | |
|                     break loop;
 | |
| 
 | |
|                 case 2:
 | |
|                     cursor++;
 | |
|                     break loop;
 | |
| 
 | |
|                 default:
 | |
|                     // fast forward to the end of balanced block
 | |
|                     if (this.balance[balanceEnd] === cursor) {
 | |
|                         cursor = balanceEnd;
 | |
|                     }
 | |
| 
 | |
|                     offset = this.offsetAndType[cursor] & OFFSET_MASK;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         return cursor - this.tokenIndex;
 | |
|     },
 | |
|     isBalanceEdge: function(pos) {
 | |
|         return this.balance[this.tokenIndex] < pos;
 | |
|     },
 | |
|     isDelim: function(code, offset) {
 | |
|         if (offset) {
 | |
|             return (
 | |
|                 this.lookupType(offset) === TYPE.Delim &&
 | |
|                 this.source.charCodeAt(this.lookupOffset(offset)) === code
 | |
|             );
 | |
|         }
 | |
| 
 | |
|         return (
 | |
|             this.tokenType === TYPE.Delim &&
 | |
|             this.source.charCodeAt(this.tokenStart) === code
 | |
|         );
 | |
|     },
 | |
| 
 | |
|     getTokenValue: function() {
 | |
|         return this.source.substring(this.tokenStart, this.tokenEnd);
 | |
|     },
 | |
|     getTokenLength: function() {
 | |
|         return this.tokenEnd - this.tokenStart;
 | |
|     },
 | |
|     substrToCursor: function(start) {
 | |
|         return this.source.substring(start, this.tokenStart);
 | |
|     },
 | |
| 
 | |
|     skipWS: function() {
 | |
|         for (var i = this.tokenIndex, skipTokenCount = 0; i < this.tokenCount; i++, skipTokenCount++) {
 | |
|             if ((this.offsetAndType[i] >> TYPE_SHIFT) !== WHITESPACE) {
 | |
|                 break;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         if (skipTokenCount > 0) {
 | |
|             this.skip(skipTokenCount);
 | |
|         }
 | |
|     },
 | |
|     skipSC: function() {
 | |
|         while (this.tokenType === WHITESPACE || this.tokenType === COMMENT) {
 | |
|             this.next();
 | |
|         }
 | |
|     },
 | |
|     skip: function(tokenCount) {
 | |
|         var next = this.tokenIndex + tokenCount;
 | |
| 
 | |
|         if (next < this.tokenCount) {
 | |
|             this.tokenIndex = next;
 | |
|             this.tokenStart = this.offsetAndType[next - 1] & OFFSET_MASK;
 | |
|             next = this.offsetAndType[next];
 | |
|             this.tokenType = next >> TYPE_SHIFT;
 | |
|             this.tokenEnd = next & OFFSET_MASK;
 | |
|         } else {
 | |
|             this.tokenIndex = this.tokenCount;
 | |
|             this.next();
 | |
|         }
 | |
|     },
 | |
|     next: function() {
 | |
|         var next = this.tokenIndex + 1;
 | |
| 
 | |
|         if (next < this.tokenCount) {
 | |
|             this.tokenIndex = next;
 | |
|             this.tokenStart = this.tokenEnd;
 | |
|             next = this.offsetAndType[next];
 | |
|             this.tokenType = next >> TYPE_SHIFT;
 | |
|             this.tokenEnd = next & OFFSET_MASK;
 | |
|         } else {
 | |
|             this.tokenIndex = this.tokenCount;
 | |
|             this.eof = true;
 | |
|             this.tokenType = EOF;
 | |
|             this.tokenStart = this.tokenEnd = this.source.length;
 | |
|         }
 | |
|     },
 | |
| 
 | |
|     forEachToken(fn) {
 | |
|         for (var i = 0, offset = this.firstCharOffset; i < this.tokenCount; i++) {
 | |
|             var start = offset;
 | |
|             var item = this.offsetAndType[i];
 | |
|             var end = item & OFFSET_MASK;
 | |
|             var type = item >> TYPE_SHIFT;
 | |
| 
 | |
|             offset = end;
 | |
| 
 | |
|             fn(type, start, end, i);
 | |
|         }
 | |
|     },
 | |
| 
 | |
|     dump() {
 | |
|         var tokens = new Array(this.tokenCount);
 | |
| 
 | |
|         this.forEachToken((type, start, end, index) => {
 | |
|             tokens[index] = {
 | |
|                 idx: index,
 | |
|                 type: NAME[type],
 | |
|                 chunk: this.source.substring(start, end),
 | |
|                 balance: this.balance[index]
 | |
|             };
 | |
|         });
 | |
| 
 | |
|         return tokens;
 | |
|     }
 | |
| };
 | |
| 
 | |
| module.exports = TokenStream;
 |