v8  3.14.5(node0.10.28)
V8 is Google's open source JavaScript engine
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
dateparser-inl.h
Go to the documentation of this file.
1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are
4 // met:
5 //
6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided
11 // with the distribution.
12 // * Neither the name of Google Inc. nor the names of its
13 // contributors may be used to endorse or promote products derived
14 // from this software without specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 
28 #ifndef V8_DATEPARSER_INL_H_
29 #define V8_DATEPARSER_INL_H_
30 
31 #include "dateparser.h"
32 
33 namespace v8 {
34 namespace internal {
35 
36 template <typename Char>
38  FixedArray* out,
39  UnicodeCache* unicode_cache) {
40  ASSERT(out->length() >= OUTPUT_SIZE);
41  InputReader<Char> in(unicode_cache, str);
42  DateStringTokenizer<Char> scanner(&in);
43  TimeZoneComposer tz;
44  TimeComposer time;
45  DayComposer day;
46 
47  // Specification:
48  // Accept ES5 ISO 8601 date-time-strings or legacy dates compatible
49  // with Safari.
50  // ES5 ISO 8601 dates:
51  // [('-'|'+')yy]yyyy[-MM[-DD]][THH:mm[:ss[.sss]][Z|(+|-)hh:mm]]
52  // where yyyy is in the range 0000..9999 and
53  // +/-yyyyyy is in the range -999999..+999999 -
54  // but -000000 is invalid (year zero must be positive),
55  // MM is in the range 01..12,
56  // DD is in the range 01..31,
57  // MM and DD defaults to 01 if missing,,
58  // HH is generally in the range 00..23, but can be 24 if mm, ss
59  // and sss are zero (or missing), representing midnight at the
60  // end of a day,
61  // mm and ss are in the range 00..59,
62  // sss is in the range 000..999,
63  // hh is in the range 00..23,
64  // mm, ss, and sss default to 00 if missing, and
65  // timezone defaults to Z if missing
66  // (following Safari, ISO actually demands local time).
67  // Extensions:
68  // We also allow sss to have more or less than three digits (but at
69  // least one).
70  // We allow hh:mm to be specified as hhmm.
71  // Legacy dates:
72  // Any unrecognized word before the first number is ignored.
73  // Parenthesized text is ignored.
74  // An unsigned number followed by ':' is a time value, and is
75  // added to the TimeComposer. A number followed by '::' adds a second
76  // zero as well. A number followed by '.' is also a time and must be
77  // followed by milliseconds.
78  // Any other number is a date component and is added to DayComposer.
79  // A month name (or really: any word having the same first three letters
80  // as a month name) is recorded as a named month in the Day composer.
81  // A word recognizable as a time-zone is recorded as such, as is
82  // '(+|-)(hhmm|hh:)'.
83  // Legacy dates don't allow extra signs ('+' or '-') or umatched ')'
84  // after a number has been read (before the first number, any garbage
85  // is allowed).
86  // Intersection of the two:
87  // A string that matches both formats (e.g. 1970-01-01) will be
88  // parsed as an ES5 date-time string - which means it will default
89  // to UTC time-zone. That's unavoidable if following the ES5
90  // specification.
91  // After a valid "T" has been read while scanning an ES5 datetime string,
92  // the input can no longer be a valid legacy date, since the "T" is a
93  // garbage string after a number has been read.
94 
95  // First try getting as far as possible with as ES5 Date Time String.
96  DateToken next_unhandled_token = ParseES5DateTime(&scanner, &day, &time, &tz);
97  if (next_unhandled_token.IsInvalid()) return false;
98  bool has_read_number = !day.IsEmpty();
99  // If there's anything left, continue with the legacy parser.
100  for (DateToken token = next_unhandled_token;
101  !token.IsEndOfInput();
102  token = scanner.Next()) {
103  if (token.IsNumber()) {
104  has_read_number = true;
105  int n = token.number();
106  if (scanner.SkipSymbol(':')) {
107  if (scanner.SkipSymbol(':')) {
108  // n + "::"
109  if (!time.IsEmpty()) return false;
110  time.Add(n);
111  time.Add(0);
112  } else {
113  // n + ":"
114  if (!time.Add(n)) return false;
115  if (scanner.Peek().IsSymbol('.')) scanner.Next();
116  }
117  } else if (scanner.SkipSymbol('.') && time.IsExpecting(n)) {
118  time.Add(n);
119  if (!scanner.Peek().IsNumber()) return false;
120  int n = ReadMilliseconds(scanner.Next());
121  if (n < 0) return false;
122  time.AddFinal(n);
123  } else if (tz.IsExpecting(n)) {
124  tz.SetAbsoluteMinute(n);
125  } else if (time.IsExpecting(n)) {
126  time.AddFinal(n);
127  // Require end, white space, "Z", "+" or "-" immediately after
128  // finalizing time.
129  DateToken peek = scanner.Peek();
130  if (!peek.IsEndOfInput() &&
131  !peek.IsWhiteSpace() &&
132  !peek.IsKeywordZ() &&
133  !peek.IsAsciiSign()) return false;
134  } else {
135  if (!day.Add(n)) return false;
136  scanner.SkipSymbol('-');
137  }
138  } else if (token.IsKeyword()) {
139  // Parse a "word" (sequence of chars. >= 'A').
140  KeywordType type = token.keyword_type();
141  int value = token.keyword_value();
142  if (type == AM_PM && !time.IsEmpty()) {
143  time.SetHourOffset(value);
144  } else if (type == MONTH_NAME) {
145  day.SetNamedMonth(value);
146  scanner.SkipSymbol('-');
147  } else if (type == TIME_ZONE_NAME && has_read_number) {
148  tz.Set(value);
149  } else {
150  // Garbage words are illegal if a number has been read.
151  if (has_read_number) return false;
152  // The first number has to be separated from garbage words by
153  // whitespace or other separators.
154  if (scanner.Peek().IsNumber()) return false;
155  }
156  } else if (token.IsAsciiSign() && (tz.IsUTC() || !time.IsEmpty())) {
157  // Parse UTC offset (only after UTC or time).
158  tz.SetSign(token.ascii_sign());
159  // The following number may be empty.
160  int n = 0;
161  if (scanner.Peek().IsNumber()) {
162  n = scanner.Next().number();
163  }
164  has_read_number = true;
165 
166  if (scanner.Peek().IsSymbol(':')) {
167  tz.SetAbsoluteHour(n);
168  tz.SetAbsoluteMinute(kNone);
169  } else {
170  tz.SetAbsoluteHour(n / 100);
171  tz.SetAbsoluteMinute(n % 100);
172  }
173  } else if ((token.IsAsciiSign() || token.IsSymbol(')')) &&
174  has_read_number) {
175  // Extra sign or ')' is illegal if a number has been read.
176  return false;
177  } else {
178  // Ignore other characters and whitespace.
179  }
180  }
181 
182  return day.Write(out) && time.Write(out) && tz.Write(out);
183 }
184 
185 
186 template<typename CharType>
187 DateParser::DateToken DateParser::DateStringTokenizer<CharType>::Scan() {
188  int pre_pos = in_->position();
189  if (in_->IsEnd()) return DateToken::EndOfInput();
190  if (in_->IsAsciiDigit()) {
191  int n = in_->ReadUnsignedNumeral();
192  int length = in_->position() - pre_pos;
193  return DateToken::Number(n, length);
194  }
195  if (in_->Skip(':')) return DateToken::Symbol(':');
196  if (in_->Skip('-')) return DateToken::Symbol('-');
197  if (in_->Skip('+')) return DateToken::Symbol('+');
198  if (in_->Skip('.')) return DateToken::Symbol('.');
199  if (in_->Skip(')')) return DateToken::Symbol(')');
200  if (in_->IsAsciiAlphaOrAbove()) {
201  ASSERT(KeywordTable::kPrefixLength == 3);
202  uint32_t buffer[3] = {0, 0, 0};
203  int length = in_->ReadWord(buffer, 3);
204  int index = KeywordTable::Lookup(buffer, length);
205  return DateToken::Keyword(KeywordTable::GetType(index),
206  KeywordTable::GetValue(index),
207  length);
208  }
209  if (in_->SkipWhiteSpace()) {
210  return DateToken::WhiteSpace(in_->position() - pre_pos);
211  }
212  if (in_->SkipParentheses()) {
213  return DateToken::Unknown();
214  }
215  in_->Next();
216  return DateToken::Unknown();
217 }
218 
219 
220 template <typename Char>
221 DateParser::DateToken DateParser::ParseES5DateTime(
222  DateStringTokenizer<Char>* scanner,
223  DayComposer* day,
224  TimeComposer* time,
225  TimeZoneComposer* tz) {
226  ASSERT(day->IsEmpty());
227  ASSERT(time->IsEmpty());
228  ASSERT(tz->IsEmpty());
229 
230  // Parse mandatory date string: [('-'|'+')yy]yyyy[':'MM[':'DD]]
231  if (scanner->Peek().IsAsciiSign()) {
232  // Keep the sign token, so we can pass it back to the legacy
233  // parser if we don't use it.
234  DateToken sign_token = scanner->Next();
235  if (!scanner->Peek().IsFixedLengthNumber(6)) return sign_token;
236  int sign = sign_token.ascii_sign();
237  int year = scanner->Next().number();
238  if (sign < 0 && year == 0) return sign_token;
239  day->Add(sign * year);
240  } else if (scanner->Peek().IsFixedLengthNumber(4)) {
241  day->Add(scanner->Next().number());
242  } else {
243  return scanner->Next();
244  }
245  if (scanner->SkipSymbol('-')) {
246  if (!scanner->Peek().IsFixedLengthNumber(2) ||
247  !DayComposer::IsMonth(scanner->Peek().number())) return scanner->Next();
248  day->Add(scanner->Next().number());
249  if (scanner->SkipSymbol('-')) {
250  if (!scanner->Peek().IsFixedLengthNumber(2) ||
251  !DayComposer::IsDay(scanner->Peek().number())) return scanner->Next();
252  day->Add(scanner->Next().number());
253  }
254  }
255  // Check for optional time string: 'T'HH':'mm[':'ss['.'sss]]Z
256  if (!scanner->Peek().IsKeywordType(TIME_SEPARATOR)) {
257  if (!scanner->Peek().IsEndOfInput()) return scanner->Next();
258  } else {
259  // ES5 Date Time String time part is present.
260  scanner->Next();
261  if (!scanner->Peek().IsFixedLengthNumber(2) ||
262  !Between(scanner->Peek().number(), 0, 24)) {
263  return DateToken::Invalid();
264  }
265  // Allow 24:00[:00[.000]], but no other time starting with 24.
266  bool hour_is_24 = (scanner->Peek().number() == 24);
267  time->Add(scanner->Next().number());
268  if (!scanner->SkipSymbol(':')) return DateToken::Invalid();
269  if (!scanner->Peek().IsFixedLengthNumber(2) ||
270  !TimeComposer::IsMinute(scanner->Peek().number()) ||
271  (hour_is_24 && scanner->Peek().number() > 0)) {
272  return DateToken::Invalid();
273  }
274  time->Add(scanner->Next().number());
275  if (scanner->SkipSymbol(':')) {
276  if (!scanner->Peek().IsFixedLengthNumber(2) ||
277  !TimeComposer::IsSecond(scanner->Peek().number()) ||
278  (hour_is_24 && scanner->Peek().number() > 0)) {
279  return DateToken::Invalid();
280  }
281  time->Add(scanner->Next().number());
282  if (scanner->SkipSymbol('.')) {
283  if (!scanner->Peek().IsNumber() ||
284  (hour_is_24 && scanner->Peek().number() > 0)) {
285  return DateToken::Invalid();
286  }
287  // Allow more or less than the mandated three digits.
288  time->Add(ReadMilliseconds(scanner->Next()));
289  }
290  }
291  // Check for optional timezone designation: 'Z' | ('+'|'-')hh':'mm
292  if (scanner->Peek().IsKeywordZ()) {
293  scanner->Next();
294  tz->Set(0);
295  } else if (scanner->Peek().IsSymbol('+') ||
296  scanner->Peek().IsSymbol('-')) {
297  tz->SetSign(scanner->Next().symbol() == '+' ? 1 : -1);
298  if (scanner->Peek().IsFixedLengthNumber(4)) {
299  // hhmm extension syntax.
300  int hourmin = scanner->Next().number();
301  int hour = hourmin / 100;
302  int min = hourmin % 100;
303  if (!TimeComposer::IsHour(hour) || !TimeComposer::IsMinute(min)) {
304  return DateToken::Invalid();
305  }
306  tz->SetAbsoluteHour(hour);
307  tz->SetAbsoluteMinute(min);
308  } else {
309  // hh:mm standard syntax.
310  if (!scanner->Peek().IsFixedLengthNumber(2) ||
311  !TimeComposer::IsHour(scanner->Peek().number())) {
312  return DateToken::Invalid();
313  }
314  tz->SetAbsoluteHour(scanner->Next().number());
315  if (!scanner->SkipSymbol(':')) return DateToken::Invalid();
316  if (!scanner->Peek().IsFixedLengthNumber(2) ||
317  !TimeComposer::IsMinute(scanner->Peek().number())) {
318  return DateToken::Invalid();
319  }
320  tz->SetAbsoluteMinute(scanner->Next().number());
321  }
322  }
323  if (!scanner->Peek().IsEndOfInput()) return DateToken::Invalid();
324  }
325  // Successfully parsed ES5 Date Time String. Default to UTC if no TZ given.
326  if (tz->IsEmpty()) tz->Set(0);
327  day->set_iso_date();
328  return DateToken::EndOfInput();
329 }
330 
331 
332 } } // namespace v8::internal
333 
334 #endif // V8_DATEPARSER_INL_H_
static bool Parse(Vector< Char > str, FixedArray *output, UnicodeCache *cache)
#define ASSERT(condition)
Definition: checks.h:270
IN DWORD64 OUT PDWORD64 OUT PIMAGEHLP_SYMBOL64 Symbol