v8  3.25.30(node0.11.13)
V8 is Google's open source JavaScript engine
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
uri.h
Go to the documentation of this file.
1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are
4 // met:
5 //
6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided
11 // with the distribution.
12 // * Neither the name of Google Inc. nor the names of its
13 // contributors may be used to endorse or promote products derived
14 // from this software without specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 
28 #ifndef V8_URI_H_
29 #define V8_URI_H_
30 
31 #include "v8.h"
32 
33 #include "string-search.h"
34 #include "v8utils.h"
35 #include "v8conversions.h"
36 
37 namespace v8 {
38 namespace internal {
39 
40 
41 template <typename Char>
42 static INLINE(Vector<const Char> GetCharVector(Handle<String> string));
43 
44 
45 template <>
47  String::FlatContent flat = string->GetFlatContent();
48  ASSERT(flat.IsAscii());
49  return flat.ToOneByteVector();
50 }
51 
52 
53 template <>
55  String::FlatContent flat = string->GetFlatContent();
56  ASSERT(flat.IsTwoByte());
57  return flat.ToUC16Vector();
58 }
59 
60 
61 class URIUnescape : public AllStatic {
62  public:
63  template<typename Char>
64  static Handle<String> Unescape(Isolate* isolate, Handle<String> source);
65 
66  private:
67  static const signed char kHexValue['g'];
68 
69  template<typename Char>
70  static Handle<String> UnescapeSlow(
71  Isolate* isolate, Handle<String> string, int start_index);
72 
73  static INLINE(int TwoDigitHex(uint16_t character1, uint16_t character2));
74 
75  template <typename Char>
76  static INLINE(int UnescapeChar(Vector<const Char> vector,
77  int i,
78  int length,
79  int* step));
80 };
81 
82 
83 const signed char URIUnescape::kHexValue[] = {
84  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
85  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
86  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
87  -0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
88  -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
89  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
90  -1, 10, 11, 12, 13, 14, 15 };
91 
92 
93 template<typename Char>
95  int index;
96  { DisallowHeapAllocation no_allocation;
98  index = search.Search(GetCharVector<Char>(source), 0);
99  if (index < 0) return source;
100  }
101  return UnescapeSlow<Char>(isolate, source, index);
102 }
103 
104 
105 template <typename Char>
106 Handle<String> URIUnescape::UnescapeSlow(
107  Isolate* isolate, Handle<String> string, int start_index) {
108  bool one_byte = true;
109  int length = string->length();
110 
111  int unescaped_length = 0;
112  { DisallowHeapAllocation no_allocation;
113  Vector<const Char> vector = GetCharVector<Char>(string);
114  for (int i = start_index; i < length; unescaped_length++) {
115  int step;
116  if (UnescapeChar(vector, i, length, &step) >
118  one_byte = false;
119  }
120  i += step;
121  }
122  }
123 
124  ASSERT(start_index < length);
125  Handle<String> first_part =
126  isolate->factory()->NewProperSubString(string, 0, start_index);
127 
128  int dest_position = 0;
129  Handle<String> second_part;
130  ASSERT(unescaped_length <= String::kMaxLength);
131  if (one_byte) {
132  Handle<SeqOneByteString> dest =
133  isolate->factory()->NewRawOneByteString(unescaped_length);
134  ASSERT(!dest.is_null());
135  DisallowHeapAllocation no_allocation;
136  Vector<const Char> vector = GetCharVector<Char>(string);
137  for (int i = start_index; i < length; dest_position++) {
138  int step;
139  dest->SeqOneByteStringSet(dest_position,
140  UnescapeChar(vector, i, length, &step));
141  i += step;
142  }
143  second_part = dest;
144  } else {
145  Handle<SeqTwoByteString> dest =
146  isolate->factory()->NewRawTwoByteString(unescaped_length);
147  ASSERT(!dest.is_null());
148  DisallowHeapAllocation no_allocation;
149  Vector<const Char> vector = GetCharVector<Char>(string);
150  for (int i = start_index; i < length; dest_position++) {
151  int step;
152  dest->SeqTwoByteStringSet(dest_position,
153  UnescapeChar(vector, i, length, &step));
154  i += step;
155  }
156  second_part = dest;
157  }
158  return isolate->factory()->NewConsString(first_part, second_part);
159 }
160 
161 
162 int URIUnescape::TwoDigitHex(uint16_t character1, uint16_t character2) {
163  if (character1 > 'f') return -1;
164  int hi = kHexValue[character1];
165  if (hi == -1) return -1;
166  if (character2 > 'f') return -1;
167  int lo = kHexValue[character2];
168  if (lo == -1) return -1;
169  return (hi << 4) + lo;
170 }
171 
172 
173 template <typename Char>
174 int URIUnescape::UnescapeChar(Vector<const Char> vector,
175  int i,
176  int length,
177  int* step) {
178  uint16_t character = vector[i];
179  int32_t hi = 0;
180  int32_t lo = 0;
181  if (character == '%' &&
182  i <= length - 6 &&
183  vector[i + 1] == 'u' &&
184  (hi = TwoDigitHex(vector[i + 2],
185  vector[i + 3])) != -1 &&
186  (lo = TwoDigitHex(vector[i + 4],
187  vector[i + 5])) != -1) {
188  *step = 6;
189  return (hi << 8) + lo;
190  } else if (character == '%' &&
191  i <= length - 3 &&
192  (lo = TwoDigitHex(vector[i + 1],
193  vector[i + 2])) != -1) {
194  *step = 3;
195  return lo;
196  } else {
197  *step = 1;
198  return character;
199  }
200 }
201 
202 
203 class URIEscape : public AllStatic {
204  public:
205  template<typename Char>
206  static Handle<String> Escape(Isolate* isolate, Handle<String> string);
207 
208  private:
209  static const char kHexChars[17];
210  static const char kNotEscaped[256];
211 
212  static bool IsNotEscaped(uint16_t c) { return kNotEscaped[c] != 0; }
213 };
214 
215 
216 const char URIEscape::kHexChars[] = "0123456789ABCDEF";
217 
218 
219 // kNotEscaped is generated by the following:
220 //
221 // #!/bin/perl
222 // for (my $i = 0; $i < 256; $i++) {
223 // print "\n" if $i % 16 == 0;
224 // my $c = chr($i);
225 // my $escaped = 1;
226 // $escaped = 0 if $c =~ m#[A-Za-z0-9@*_+./-]#;
227 // print $escaped ? "0, " : "1, ";
228 // }
229 
230 const char URIEscape::kNotEscaped[] = {
231  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
232  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
233  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1,
234  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
235  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
236  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
237  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
238  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
239  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
240  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
241  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
242  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
243  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
244  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
245  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
246  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
247 
248 
249 template<typename Char>
251  ASSERT(string->IsFlat());
252  int escaped_length = 0;
253  int length = string->length();
254 
255  { DisallowHeapAllocation no_allocation;
256  Vector<const Char> vector = GetCharVector<Char>(string);
257  for (int i = 0; i < length; i++) {
258  uint16_t c = vector[i];
259  if (c >= 256) {
260  escaped_length += 6;
261  } else if (IsNotEscaped(c)) {
262  escaped_length++;
263  } else {
264  escaped_length += 3;
265  }
266 
267  // We don't allow strings that are longer than a maximal length.
268  ASSERT(String::kMaxLength < 0x7fffffff - 6); // Cannot overflow.
269  if (escaped_length > String::kMaxLength) break; // Provoke exception.
270  }
271  }
272 
273  // No length change implies no change. Return original string if no change.
274  if (escaped_length == length) return string;
275 
277  isolate->factory()->NewRawOneByteString(escaped_length);
279  int dest_position = 0;
280 
281  { DisallowHeapAllocation no_allocation;
282  Vector<const Char> vector = GetCharVector<Char>(string);
283  for (int i = 0; i < length; i++) {
284  uint16_t c = vector[i];
285  if (c >= 256) {
286  dest->SeqOneByteStringSet(dest_position, '%');
287  dest->SeqOneByteStringSet(dest_position+1, 'u');
288  dest->SeqOneByteStringSet(dest_position+2, kHexChars[c >> 12]);
289  dest->SeqOneByteStringSet(dest_position+3, kHexChars[(c >> 8) & 0xf]);
290  dest->SeqOneByteStringSet(dest_position+4, kHexChars[(c >> 4) & 0xf]);
291  dest->SeqOneByteStringSet(dest_position+5, kHexChars[c & 0xf]);
292  dest_position += 6;
293  } else if (IsNotEscaped(c)) {
294  dest->SeqOneByteStringSet(dest_position, c);
295  dest_position++;
296  } else {
297  dest->SeqOneByteStringSet(dest_position, '%');
298  dest->SeqOneByteStringSet(dest_position+1, kHexChars[c >> 4]);
299  dest->SeqOneByteStringSet(dest_position+2, kHexChars[c & 0xf]);
300  dest_position += 3;
301  }
302  }
303  }
304 
305  return dest;
306 }
307 
308 } } // namespace v8::internal
309 
310 #endif // V8_URI_H_
Vector< const uint8_t > GetCharVector(Handle< String > string)
Definition: uri.h:46
static Handle< String > Unescape(Isolate *isolate, Handle< String > source)
Definition: uri.h:94
int int32_t
Definition: unicode.cc:47
int Search(Vector< const SubjectChar > subject, int index)
#define ASSERT(condition)
Definition: checks.h:329
unsigned short uint16_t
Definition: unicode.cc:46
Factory * factory()
Definition: isolate.h:995
#define RETURN_IF_EMPTY_HANDLE_VALUE(isolate, call, value)
Definition: isolate.h:137
Handle< SeqTwoByteString > NewRawTwoByteString(int length, PretenureFlag pretenure=NOT_TENURED)
Definition: factory.cc:300
Vector< const uc16 > ToUC16Vector()
Definition: objects.h:8757
static Handle< String > Escape(Isolate *isolate, Handle< String > string)
Definition: uri.h:250
#define STATIC_ASCII_VECTOR(x)
Definition: utils.h:570
Handle< String > NewProperSubString(Handle< String > str, int begin, int end)
Definition: factory.cc:475
INLINE(static HeapObject *EnsureDoubleAligned(Heap *heap, HeapObject *object, int size))
Handle< SeqOneByteString > NewRawOneByteString(int length, PretenureFlag pretenure=NOT_TENURED)
Definition: factory.cc:291
PerThreadAssertScopeDebugOnly< HEAP_ALLOCATION_ASSERT, false > DisallowHeapAllocation
Definition: assert-scope.h:214
Handle< String > NewConsString(Handle< String > left, Handle< String > right)
Definition: factory.cc:370
static const int kMaxLength
Definition: objects.h:8922
Vector< const uint8_t > ToOneByteVector()
Definition: objects.h:8751
static const int32_t kMaxOneByteCharCode
Definition: objects.h:8914