v8  3.11.10(node0.8.26)
V8 is Google's open source JavaScript engine
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
dateparser.h
Go to the documentation of this file.
1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are
4 // met:
5 //
6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided
11 // with the distribution.
12 // * Neither the name of Google Inc. nor the names of its
13 // contributors may be used to endorse or promote products derived
14 // from this software without specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 
28 #ifndef V8_DATEPARSER_H_
29 #define V8_DATEPARSER_H_
30 
31 #include "allocation.h"
32 #include "char-predicates-inl.h"
33 
34 namespace v8 {
35 namespace internal {
36 
37 class DateParser : public AllStatic {
38  public:
39  // Parse the string as a date. If parsing succeeds, return true after
40  // filling out the output array as follows (all integers are Smis):
41  // [0]: year
42  // [1]: month (0 = Jan, 1 = Feb, ...)
43  // [2]: day
44  // [3]: hour
45  // [4]: minute
46  // [5]: second
47  // [6]: millisecond
48  // [7]: UTC offset in seconds, or null value if no timezone specified
49  // If parsing fails, return false (content of output array is not defined).
50  template <typename Char>
51  static bool Parse(Vector<Char> str, FixedArray* output, UnicodeCache* cache);
52 
53  enum {
55  };
56 
57  private:
58  // Range testing
59  static inline bool Between(int x, int lo, int hi) {
60  return static_cast<unsigned>(x - lo) <= static_cast<unsigned>(hi - lo);
61  }
62 
63  // Indicates a missing value.
64  static const int kNone = kMaxInt;
65 
66  // Maximal number of digits used to build the value of a numeral.
67  // Remaining digits are ignored.
68  static const int kMaxSignificantDigits = 9;
69 
70  // InputReader provides basic string parsing and character classification.
71  template <typename Char>
72  class InputReader BASE_EMBEDDED {
73  public:
74  InputReader(UnicodeCache* unicode_cache, Vector<Char> s)
75  : index_(0),
76  buffer_(s),
77  unicode_cache_(unicode_cache) {
78  Next();
79  }
80 
81  int position() { return index_; }
82 
83  // Advance to the next character of the string.
84  void Next() {
85  ch_ = (index_ < buffer_.length()) ? buffer_[index_] : 0;
86  index_++;
87  }
88 
89  // Read a string of digits as an unsigned number. Cap value at
90  // kMaxSignificantDigits, but skip remaining digits if the numeral
91  // is longer.
92  int ReadUnsignedNumeral() {
93  int n = 0;
94  int i = 0;
95  while (IsAsciiDigit()) {
96  if (i < kMaxSignificantDigits) n = n * 10 + ch_ - '0';
97  i++;
98  Next();
99  }
100  return n;
101  }
102 
103  // Read a word (sequence of chars. >= 'A'), fill the given buffer with a
104  // lower-case prefix, and pad any remainder of the buffer with zeroes.
105  // Return word length.
106  int ReadWord(uint32_t* prefix, int prefix_size) {
107  int len;
108  for (len = 0; IsAsciiAlphaOrAbove(); Next(), len++) {
109  if (len < prefix_size) prefix[len] = AsciiAlphaToLower(ch_);
110  }
111  for (int i = len; i < prefix_size; i++) prefix[i] = 0;
112  return len;
113  }
114 
115  // The skip methods return whether they actually skipped something.
116  bool Skip(uint32_t c) {
117  if (ch_ == c) {
118  Next();
119  return true;
120  }
121  return false;
122  }
123 
124  bool SkipWhiteSpace() {
125  if (unicode_cache_->IsWhiteSpace(ch_)) {
126  Next();
127  return true;
128  }
129  return false;
130  }
131 
132  bool SkipParentheses() {
133  if (ch_ != '(') return false;
134  int balance = 0;
135  do {
136  if (ch_ == ')') --balance;
137  else if (ch_ == '(') ++balance;
138  Next();
139  } while (balance > 0 && ch_);
140  return true;
141  }
142 
143  // Character testing/classification. Non-ASCII digits are not supported.
144  bool Is(uint32_t c) const { return ch_ == c; }
145  bool IsEnd() const { return ch_ == 0; }
146  bool IsAsciiDigit() const { return IsDecimalDigit(ch_); }
147  bool IsAsciiAlphaOrAbove() const { return ch_ >= 'A'; }
148  bool IsAsciiSign() const { return ch_ == '+' || ch_ == '-'; }
149 
150  // Return 1 for '+' and -1 for '-'.
151  int GetAsciiSignValue() const { return 44 - static_cast<int>(ch_); }
152 
153  private:
154  int index_;
155  Vector<Char> buffer_;
156  uint32_t ch_;
157  UnicodeCache* unicode_cache_;
158  };
159 
160  enum KeywordType {
161  INVALID, MONTH_NAME, TIME_ZONE_NAME, TIME_SEPARATOR, AM_PM
162  };
163 
164  struct DateToken {
165  public:
166  bool IsInvalid() { return tag_ == kInvalidTokenTag; }
167  bool IsUnknown() { return tag_ == kUnknownTokenTag; }
168  bool IsNumber() { return tag_ == kNumberTag; }
169  bool IsSymbol() { return tag_ == kSymbolTag; }
170  bool IsWhiteSpace() { return tag_ == kWhiteSpaceTag; }
171  bool IsEndOfInput() { return tag_ == kEndOfInputTag; }
172  bool IsKeyword() { return tag_ >= kKeywordTagStart; }
173 
174  int length() { return length_; }
175 
176  int number() {
177  ASSERT(IsNumber());
178  return value_;
179  }
180  KeywordType keyword_type() {
181  ASSERT(IsKeyword());
182  return static_cast<KeywordType>(tag_);
183  }
184  int keyword_value() {
185  ASSERT(IsKeyword());
186  return value_;
187  }
188  char symbol() {
189  ASSERT(IsSymbol());
190  return static_cast<char>(value_);
191  }
192  bool IsSymbol(char symbol) {
193  return IsSymbol() && this->symbol() == symbol;
194  }
195  bool IsKeywordType(KeywordType tag) {
196  return tag_ == tag;
197  }
198  bool IsFixedLengthNumber(int length) {
199  return IsNumber() && length_ == length;
200  }
201  bool IsAsciiSign() {
202  return tag_ == kSymbolTag && (value_ == '-' || value_ == '+');
203  }
204  int ascii_sign() {
205  ASSERT(IsAsciiSign());
206  return 44 - value_;
207  }
208  bool IsKeywordZ() {
209  return IsKeywordType(TIME_ZONE_NAME) && length_ == 1 && value_ == 0;
210  }
211  bool IsUnknown(int character) {
212  return IsUnknown() && value_ == character;
213  }
214  // Factory functions.
215  static DateToken Keyword(KeywordType tag, int value, int length) {
216  return DateToken(tag, length, value);
217  }
218  static DateToken Number(int value, int length) {
219  return DateToken(kNumberTag, length, value);
220  }
221  static DateToken Symbol(char symbol) {
222  return DateToken(kSymbolTag, 1, symbol);
223  }
224  static DateToken EndOfInput() {
225  return DateToken(kEndOfInputTag, 0, -1);
226  }
227  static DateToken WhiteSpace(int length) {
228  return DateToken(kWhiteSpaceTag, length, -1);
229  }
230  static DateToken Unknown() {
231  return DateToken(kUnknownTokenTag, 1, -1);
232  }
233  static DateToken Invalid() {
234  return DateToken(kInvalidTokenTag, 0, -1);
235  }
236 
237  private:
238  enum TagType {
239  kInvalidTokenTag = -6,
240  kUnknownTokenTag = -5,
241  kWhiteSpaceTag = -4,
242  kNumberTag = -3,
243  kSymbolTag = -2,
244  kEndOfInputTag = -1,
245  kKeywordTagStart = 0
246  };
247  DateToken(int tag, int length, int value)
248  : tag_(tag),
249  length_(length),
250  value_(value) { }
251 
252  int tag_;
253  int length_; // Number of characters.
254  int value_;
255  };
256 
257  template <typename Char>
258  class DateStringTokenizer {
259  public:
260  explicit DateStringTokenizer(InputReader<Char>* in)
261  : in_(in), next_(Scan()) { }
262  DateToken Next() {
263  DateToken result = next_;
264  next_ = Scan();
265  return result;
266  }
267 
268  DateToken Peek() {
269  return next_;
270  }
271  bool SkipSymbol(char symbol) {
272  if (next_.IsSymbol(symbol)) {
273  next_ = Scan();
274  return true;
275  }
276  return false;
277  }
278 
279  private:
280  DateToken Scan();
281 
282  InputReader<Char>* in_;
283  DateToken next_;
284  };
285 
286  static int ReadMilliseconds(DateToken number);
287 
288  // KeywordTable maps names of months, time zones, am/pm to numbers.
289  class KeywordTable : public AllStatic {
290  public:
291  // Look up a word in the keyword table and return an index.
292  // 'pre' contains a prefix of the word, zero-padded to size kPrefixLength
293  // and 'len' is the word length.
294  static int Lookup(const uint32_t* pre, int len);
295  // Get the type of the keyword at index i.
296  static KeywordType GetType(int i) {
297  return static_cast<KeywordType>(array[i][kTypeOffset]);
298  }
299  // Get the value of the keyword at index i.
300  static int GetValue(int i) { return array[i][kValueOffset]; }
301 
302  static const int kPrefixLength = 3;
303  static const int kTypeOffset = kPrefixLength;
304  static const int kValueOffset = kTypeOffset + 1;
305  static const int kEntrySize = kValueOffset + 1;
306  static const int8_t array[][kEntrySize];
307  };
308 
309  class TimeZoneComposer BASE_EMBEDDED {
310  public:
311  TimeZoneComposer() : sign_(kNone), hour_(kNone), minute_(kNone) {}
312  void Set(int offset_in_hours) {
313  sign_ = offset_in_hours < 0 ? -1 : 1;
314  hour_ = offset_in_hours * sign_;
315  minute_ = 0;
316  }
317  void SetSign(int sign) { sign_ = sign < 0 ? -1 : 1; }
318  void SetAbsoluteHour(int hour) { hour_ = hour; }
319  void SetAbsoluteMinute(int minute) { minute_ = minute; }
320  bool IsExpecting(int n) const {
321  return hour_ != kNone && minute_ == kNone && TimeComposer::IsMinute(n);
322  }
323  bool IsUTC() const { return hour_ == 0 && minute_ == 0; }
324  bool Write(FixedArray* output);
325  bool IsEmpty() { return hour_ == kNone; }
326  private:
327  int sign_;
328  int hour_;
329  int minute_;
330  };
331 
332  class TimeComposer BASE_EMBEDDED {
333  public:
334  TimeComposer() : index_(0), hour_offset_(kNone) {}
335  bool IsEmpty() const { return index_ == 0; }
336  bool IsExpecting(int n) const {
337  return (index_ == 1 && IsMinute(n)) ||
338  (index_ == 2 && IsSecond(n)) ||
339  (index_ == 3 && IsMillisecond(n));
340  }
341  bool Add(int n) {
342  return index_ < kSize ? (comp_[index_++] = n, true) : false;
343  }
344  bool AddFinal(int n) {
345  if (!Add(n)) return false;
346  while (index_ < kSize) comp_[index_++] = 0;
347  return true;
348  }
349  void SetHourOffset(int n) { hour_offset_ = n; }
350  bool Write(FixedArray* output);
351 
352  static bool IsMinute(int x) { return Between(x, 0, 59); }
353  static bool IsHour(int x) { return Between(x, 0, 23); }
354  static bool IsSecond(int x) { return Between(x, 0, 59); }
355 
356  private:
357  static bool IsHour12(int x) { return Between(x, 0, 12); }
358  static bool IsMillisecond(int x) { return Between(x, 0, 999); }
359 
360  static const int kSize = 4;
361  int comp_[kSize];
362  int index_;
363  int hour_offset_;
364  };
365 
366  class DayComposer BASE_EMBEDDED {
367  public:
368  DayComposer() : index_(0), named_month_(kNone), is_iso_date_(false) {}
369  bool IsEmpty() const { return index_ == 0; }
370  bool Add(int n) {
371  if (index_ < kSize) {
372  comp_[index_] = n;
373  index_++;
374  return true;
375  }
376  return false;
377  }
378  void SetNamedMonth(int n) { named_month_ = n; }
379  bool Write(FixedArray* output);
380  void set_iso_date() { is_iso_date_ = true; }
381  static bool IsMonth(int x) { return Between(x, 1, 12); }
382  static bool IsDay(int x) { return Between(x, 1, 31); }
383 
384  private:
385  static const int kSize = 3;
386  int comp_[kSize];
387  int index_;
388  int named_month_;
389  // If set, ensures that data is always parsed in year-month-date order.
390  bool is_iso_date_;
391  };
392 
393  // Tries to parse an ES5 Date Time String. Returns the next token
394  // to continue with in the legacy date string parser. If parsing is
395  // complete, returns DateToken::EndOfInput(). If terminally unsuccessful,
396  // returns DateToken::Invalid(). Otherwise parsing continues in the
397  // legacy parser.
398  template <typename Char>
399  static DateParser::DateToken ParseES5DateTime(
400  DateStringTokenizer<Char>* scanner,
401  DayComposer* day,
402  TimeComposer* time,
403  TimeZoneComposer* tz);
404 };
405 
406 
407 } } // namespace v8::internal
408 
409 #endif // V8_DATEPARSER_H_
int AsciiAlphaToLower(uc32 c)
static bool Parse(Vector< Char > str, FixedArray *output, UnicodeCache *cache)
const int kMaxInt
Definition: globals.h:224
#define ASSERT(condition)
Definition: checks.h:270
StringInputBuffer *const buffer_
#define BASE_EMBEDDED
Definition: allocation.h:68
IN DWORD64 OUT PDWORD64 OUT PIMAGEHLP_SYMBOL64 Symbol
const uint32_t kSymbolTag
Definition: objects.h:445
bool IsDecimalDigit(uc32 c)