32 #include "../include/v8stdint.h"
42 : unicode_cache_(unicode_cache),
44 harmony_scoping_(
false),
45 harmony_modules_(
false) { }
55 has_line_terminator_before_next_ =
true;
61 uc32 Scanner::ScanHexNumber(
int expected_length) {
62 ASSERT(expected_length <= 4);
64 uc32 digits[4] = { 0, 0, 0, 0 };
66 for (
int i = 0; i < expected_length; i++) {
75 for (
int j = i-1; j >= 0; j--) {
92 static const byte one_char_tokens[] = {
226 has_line_terminator_before_next_ =
false;
227 has_multiline_comment_before_next_ =
false;
228 if (static_cast<unsigned>(c0_) <= 0x7f) {
230 if (token != Token::ILLEGAL) {
231 int pos = source_pos();
233 next_.location.beg_pos = pos;
234 next_.location.end_pos = pos + 1;
236 return current_.token;
240 return current_.token;
244 static inline bool IsByteOrderMark(
uc32 c) {
252 return c == 0xFEFF || c == 0xFFFE;
256 bool Scanner::SkipWhiteSpace() {
257 int start_position = source_pos();
262 while (unicode_cache_->
IsWhiteSpace(c0_) || IsByteOrderMark(c0_)) {
267 has_line_terminator_before_next_ =
true;
276 if (c0_ ==
'-' && has_line_terminator_before_next_) {
282 SkipSingleLineComment();
291 return source_pos() != start_position;
308 return Token::WHITESPACE;
322 has_multiline_comment_before_next_ =
true;
327 if (ch ==
'*' && c0_ ==
'/') {
329 return Token::WHITESPACE;
334 return Token::ILLEGAL;
344 if (c0_ ==
'-')
return SkipSingleLineComment();
353 void Scanner::Scan() {
354 next_.literal_chars =
NULL;
358 next_.location.beg_pos = source_pos();
364 token = Token::WHITESPACE;
369 has_line_terminator_before_next_ =
true;
370 token = Token::WHITESPACE;
374 token = ScanString();
381 token = Select(Token::LTE);
382 }
else if (c0_ ==
'<') {
383 token = Select(
'=', Token::ASSIGN_SHL, Token::SHL);
384 }
else if (c0_ ==
'!') {
385 token = ScanHtmlComment();
395 token = Select(Token::GTE);
396 }
else if (c0_ ==
'>') {
400 token = Select(Token::ASSIGN_SAR);
401 }
else if (c0_ ==
'>') {
402 token = Select(
'=', Token::ASSIGN_SHR, Token::SHR);
415 token = Select(
'=', Token::EQ_STRICT,
Token::EQ);
417 token = Token::ASSIGN;
425 token = Select(
'=', Token::NE_STRICT, Token::NE);
435 token = Select(Token::INC);
436 }
else if (c0_ ==
'=') {
437 token = Select(Token::ASSIGN_ADD);
448 if (c0_ ==
'>' && has_line_terminator_before_next_) {
451 token = SkipSingleLineComment();
455 }
else if (c0_ ==
'=') {
456 token = Select(Token::ASSIGN_SUB);
464 token = Select(
'=', Token::ASSIGN_MUL,
Token::MUL);
469 token = Select(
'=', Token::ASSIGN_MOD, Token::MOD);
476 token = SkipSingleLineComment();
477 }
else if (c0_ ==
'*') {
478 token = SkipMultiLineComment();
479 }
else if (c0_ ==
'=') {
480 token = Select(Token::ASSIGN_DIV);
491 }
else if (c0_ ==
'=') {
492 token = Select(Token::ASSIGN_BIT_AND);
494 token = Token::BIT_AND;
503 }
else if (c0_ ==
'=') {
504 token = Select(Token::ASSIGN_BIT_OR);
506 token = Token::BIT_OR;
512 token = Select(
'=', Token::ASSIGN_BIT_XOR, Token::BIT_XOR);
519 token = ScanNumber(
true);
521 token = Token::PERIOD;
526 token = Select(Token::COLON);
530 token = Select(Token::SEMICOLON);
534 token = Select(Token::COMMA);
538 token = Select(Token::LPAREN);
542 token = Select(Token::RPAREN);
546 token = Select(Token::LBRACK);
550 token = Select(Token::RBRACK);
554 token = Select(Token::LBRACE);
558 token = Select(Token::RBRACE);
562 token = Select(Token::CONDITIONAL);
566 token = Select(Token::BIT_NOT);
571 token = ScanIdentifierOrKeyword();
573 token = ScanNumber(
false);
574 }
else if (SkipWhiteSpace()) {
575 token = Token::WHITESPACE;
576 }
else if (c0_ < 0) {
579 token = Select(Token::ILLEGAL);
586 }
while (token == Token::WHITESPACE);
588 next_.location.end_pos = source_pos();
596 if (pos == next_.location.beg_pos)
return;
597 int current_pos = source_pos();
598 ASSERT_EQ(next_.location.end_pos, current_pos);
600 ASSERT(pos >= current_pos);
601 if (pos != current_pos) {
607 has_line_terminator_before_next_ =
false;
608 has_multiline_comment_before_next_ =
false;
614 bool Scanner::ScanEscape() {
631 case 'b' : c =
'\b';
break;
632 case 'f' : c =
'\f';
break;
633 case 'n' : c =
'\n';
break;
634 case 'r' : c =
'\r';
break;
635 case 't' : c =
'\t';
break;
637 c = ScanHexNumber(4);
638 if (c < 0)
return false;
641 case 'v' : c =
'\v';
break;
643 c = ScanHexNumber(2);
644 if (c < 0)
return false;
670 for (; i < length; i++) {
672 if (d < 0 || d > 7)
break;
674 if (nx >= 256)
break;
683 if (c !=
'0' || i > 0) {
684 octal_pos_ =
Location(source_pos() - i - 1, source_pos() - 1);
694 LiteralScope literal(
this);
695 while (c0_ != quote && c0_ >= 0
700 if (c0_ < 0 || !ScanEscape())
return Token::ILLEGAL;
705 if (c0_ != quote)
return Token::ILLEGAL;
709 return Token::STRING;
713 void Scanner::ScanDecimalDigits() {
715 AddLiteralCharAdvance();
722 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL;
724 LiteralScope literal(
this);
733 int start_pos = source_pos();
734 AddLiteralCharAdvance();
737 if (c0_ ==
'x' || c0_ ==
'X') {
740 AddLiteralCharAdvance();
743 return Token::ILLEGAL;
746 AddLiteralCharAdvance();
748 }
else if (
'0' <= c0_ && c0_ <=
'7') {
752 if (c0_ ==
'8' || c0_ ==
'9') {
756 if (c0_ <
'0' ||
'7' < c0_) {
758 octal_pos_ = Location(start_pos, source_pos());
761 AddLiteralCharAdvance();
767 if (kind == DECIMAL) {
770 AddLiteralCharAdvance();
777 if (c0_ ==
'e' || c0_ ==
'E') {
779 if (kind == OCTAL)
return Token::ILLEGAL;
781 AddLiteralCharAdvance();
782 if (c0_ ==
'+' || c0_ ==
'-')
783 AddLiteralCharAdvance();
786 return Token::ILLEGAL;
796 return Token::ILLEGAL;
800 return Token::NUMBER;
804 uc32 Scanner::ScanIdentifierUnicodeEscape() {
806 if (c0_ !=
'u')
return -1;
808 uc32 result = ScanHexNumber(4);
809 if (result < 0) PushBack(
'u');
817 #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \
819 KEYWORD("break", Token::BREAK) \
821 KEYWORD("case", Token::CASE) \
822 KEYWORD("catch", Token::CATCH) \
823 KEYWORD("class", Token::FUTURE_RESERVED_WORD) \
824 KEYWORD("const", Token::CONST) \
825 KEYWORD("continue", Token::CONTINUE) \
827 KEYWORD("debugger", Token::DEBUGGER) \
828 KEYWORD("default", Token::DEFAULT) \
829 KEYWORD("delete", Token::DELETE) \
830 KEYWORD("do", Token::DO) \
832 KEYWORD("else", Token::ELSE) \
833 KEYWORD("enum", Token::FUTURE_RESERVED_WORD) \
834 KEYWORD("export", harmony_modules \
835 ? Token::EXPORT : Token::FUTURE_RESERVED_WORD) \
836 KEYWORD("extends", Token::FUTURE_RESERVED_WORD) \
838 KEYWORD("false", Token::FALSE_LITERAL) \
839 KEYWORD("finally", Token::FINALLY) \
840 KEYWORD("for", Token::FOR) \
841 KEYWORD("function", Token::FUNCTION) \
843 KEYWORD("if", Token::IF) \
844 KEYWORD("implements", Token::FUTURE_STRICT_RESERVED_WORD) \
845 KEYWORD("import", harmony_modules \
846 ? Token::IMPORT : Token::FUTURE_RESERVED_WORD) \
847 KEYWORD("in", Token::IN) \
848 KEYWORD("instanceof", Token::INSTANCEOF) \
849 KEYWORD("interface", Token::FUTURE_STRICT_RESERVED_WORD) \
851 KEYWORD("let", harmony_scoping \
852 ? Token::LET : Token::FUTURE_STRICT_RESERVED_WORD) \
854 KEYWORD("new", Token::NEW) \
855 KEYWORD("null", Token::NULL_LITERAL) \
857 KEYWORD("package", Token::FUTURE_STRICT_RESERVED_WORD) \
858 KEYWORD("private", Token::FUTURE_STRICT_RESERVED_WORD) \
859 KEYWORD("protected", Token::FUTURE_STRICT_RESERVED_WORD) \
860 KEYWORD("public", Token::FUTURE_STRICT_RESERVED_WORD) \
862 KEYWORD("return", Token::RETURN) \
864 KEYWORD("static", Token::FUTURE_STRICT_RESERVED_WORD) \
865 KEYWORD("super", Token::FUTURE_RESERVED_WORD) \
866 KEYWORD("switch", Token::SWITCH) \
868 KEYWORD("this", Token::THIS) \
869 KEYWORD("throw", Token::THROW) \
870 KEYWORD("true", Token::TRUE_LITERAL) \
871 KEYWORD("try", Token::TRY) \
872 KEYWORD("typeof", Token::TYPEOF) \
874 KEYWORD("var", Token::VAR) \
875 KEYWORD("void", Token::VOID) \
877 KEYWORD("while", Token::WHILE) \
878 KEYWORD("with", Token::WITH) \
880 KEYWORD("yield", Token::FUTURE_STRICT_RESERVED_WORD)
883 static Token::Value KeywordOrIdentifierToken(
const char* input,
885 bool harmony_scoping,
886 bool harmony_modules) {
887 ASSERT(input_length >= 1);
888 const int kMinLength = 2;
889 const int kMaxLength = 10;
890 if (input_length < kMinLength || input_length > kMaxLength) {
891 return Token::IDENTIFIER;
895 #define KEYWORD_GROUP_CASE(ch) \
898 #define KEYWORD(keyword, token) \
902 const int keyword_length = sizeof(keyword) - 1; \
903 STATIC_ASSERT(keyword_length >= kMinLength); \
904 STATIC_ASSERT(keyword_length <= kMaxLength); \
905 if (input_length == keyword_length && \
906 input[1] == keyword[1] && \
907 (keyword_length <= 2 || input[2] == keyword[2]) && \
908 (keyword_length <= 3 || input[3] == keyword[3]) && \
909 (keyword_length <= 4 || input[4] == keyword[4]) && \
910 (keyword_length <= 5 || input[5] == keyword[5]) && \
911 (keyword_length <= 6 || input[6] == keyword[6]) && \
912 (keyword_length <= 7 || input[7] == keyword[7]) && \
913 (keyword_length <= 8 || input[8] == keyword[8]) && \
914 (keyword_length <= 9 || input[9] == keyword[9])) { \
920 return Token::IDENTIFIER;
926 LiteralScope literal(
this);
929 uc32 c = ScanIdentifierUnicodeEscape();
934 return Token::ILLEGAL;
937 return ScanIdentifierSuffix(&literal);
940 uc32 first_char = c0_;
942 AddLiteralChar(first_char);
947 uc32 next_char = c0_;
949 AddLiteralChar(next_char);
953 return ScanIdentifierSuffix(&literal);
958 if (next_.literal_chars->is_ascii()) {
959 Vector<const char> chars = next_.literal_chars->ascii_literal();
960 return KeywordOrIdentifierToken(chars.start(),
966 return Token::IDENTIFIER;
970 Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal) {
974 uc32 c = ScanIdentifierUnicodeEscape();
979 return Token::ILLEGAL;
989 return Token::IDENTIFIER;
995 bool in_character_class =
false;
999 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1);
1000 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0);
1007 AddLiteralChar(
'=');
1010 while (c0_ !=
'/' || in_character_class) {
1013 AddLiteralCharAdvance();
1015 AddLiteralCharAdvance();
1027 if (c0_ ==
'[') in_character_class =
true;
1028 if (c0_ ==
']') in_character_class =
false;
1029 AddLiteralCharAdvance();
1040 bool Scanner::ScanLiteralUnicodeEscape() {
1042 uc32 chars_read[6] = {
'\\',
'u', 0, 0, 0, 0};
1050 chars_read[i] = c0_;
1058 PushBack(chars_read[i]);
1063 for (
int i = 0; i < 6; i++) {
1064 AddLiteralChar(chars_read[i]);
1075 AddLiteralCharAdvance();
1077 if (!ScanLiteralUnicodeEscape()) {
1085 next_.location.end_pos = source_pos() - 1;
bool IsIdentifierPart(unibrow::uchar c)
uc32 ScanOctalEscape(uc32 c, int length)
bool IsWhiteSpace(unibrow::uchar c)
Scanner(UnicodeCache *scanner_contants)
#define ASSERT(condition)
#define KEYWORD(keyword, token)
STATIC_ASSERT((FixedDoubleArray::kHeaderSize &kDoubleAlignmentMask)==0)
void SeekForward(int pos)
#define KEYWORD_GROUP_CASE(ch)
#define KEYWORDS(KEYWORD_GROUP, KEYWORD)
activate correct semantics for inheriting readonliness false
void Initialize(Utf16CharacterStream *source)
bool IsLineTerminator(unibrow::uchar c)
bool IsCarriageReturn(uc32 c)
#define ASSERT_EQ(v1, v2)
activate correct semantics for inheriting readonliness enable harmony semantics for typeof enable harmony enable harmony proxies enable all harmony harmony_scoping harmony_proxies harmony_scoping tracks arrays with only smi values automatically unbox arrays of doubles use crankshaft use hydrogen range analysis use hydrogen global value numbering use function inlining maximum number of AST nodes considered for a single inlining loop invariant code motion print statistics for hydrogen trace generated IR for specified phases trace register allocator trace range analysis trace representation types environment for every instruction put a break point before deoptimizing polymorphic inlining perform array bounds checks elimination use dead code elimination trace on stack replacement optimize closures cache optimized code for closures functions with arguments object loop weight for representation inference allow uint32 values on optimize frames if they are used only in safe operations track parallel recompilation enable all profiler experiments number of stack frames inspected by the profiler call recompile stub directly when self optimizing trigger profiler ticks based on counting instead of timing weight back edges by jump distance for interrupt triggering percentage of ICs that must have type info to allow optimization watch_ic_patching retry_self_opt interrupt_at_exit extra verbose compilation tracing generate extra emit comments in code disassembly enable use of SSE3 instructions if available enable use of CMOV instruction if available enable use of SAHF instruction if enable use of VFP3 instructions if available this implies enabling ARMv7 and VFP2 enable use of VFP2 instructions if available enable use of SDIV and UDIV instructions if enable loading bit constant by means of movw movt instruction enable unaligned accesses for enable use of MIPS FPU instructions if NULL
bool IsIdentifierStart(unibrow::uchar c)
unsigned SeekForward(unsigned code_unit_count)
bool ScanRegExpPattern(bool seen_equal)
bool IsDecimalDigit(uc32 c)