Node.js  v8.x
Node.js is a JavaScript runtime built on Chrome's V8 JavaScript engine
node_url.cc
Go to the documentation of this file.
1 #include "node_url.h"
2 #include "node.h"
3 #include "node_internals.h"
4 #include "env.h"
5 #include "env-inl.h"
6 #include "util.h"
7 #include "util-inl.h"
8 #include "v8.h"
9 #include "base-object.h"
10 #include "base-object-inl.h"
11 #include "node_i18n.h"
12 
13 #include <string>
14 #include <vector>
15 #include <stdio.h>
16 #include <cmath>
17 
18 namespace node {
19 
20 using v8::Array;
21 using v8::Context;
22 using v8::Function;
23 using v8::FunctionCallbackInfo;
24 using v8::HandleScope;
25 using v8::Integer;
26 using v8::Isolate;
27 using v8::Local;
28 using v8::MaybeLocal;
29 using v8::Null;
30 using v8::Object;
31 using v8::String;
32 using v8::TryCatch;
33 using v8::Undefined;
34 using v8::Value;
35 
36 #define GET(env, obj, name) \
37  obj->Get(env->context(), \
38  OneByteString(env->isolate(), name)).ToLocalChecked()
39 
40 #define GET_AND_SET(env, obj, name, data, flag) \
41  { \
42  Local<Value> val = GET(env, obj, #name); \
43  if (val->IsString()) { \
44  Utf8Value value(env->isolate(), val.As<String>()); \
45  data->name = *value; \
46  data->flags |= flag; \
47  } \
48  }
49 
50 #define UTF8STRING(isolate, str) \
51  String::NewFromUtf8(isolate, str.c_str(), v8::NewStringType::kNormal) \
52  .ToLocalChecked()
53 
54 namespace url {
55 
56 // https://url.spec.whatwg.org/#eof-code-point
57 static const char kEOL = -1;
58 
59 // Used in ToUSVString().
60 static const char16_t kUnicodeReplacementCharacter = 0xFFFD;
61 
62 // https://url.spec.whatwg.org/#concept-host
64  std::string domain;
65  uint32_t ipv4;
66  uint16_t ipv6[8];
67  std::string opaque;
69 };
70 
77 };
78 
79 struct url_host {
81  enum url_host_type type;
82 };
83 
84 #define ARGS(XX) \
85  XX(ARG_FLAGS) \
86  XX(ARG_PROTOCOL) \
87  XX(ARG_USERNAME) \
88  XX(ARG_PASSWORD) \
89  XX(ARG_HOST) \
90  XX(ARG_PORT) \
91  XX(ARG_PATH) \
92  XX(ARG_QUERY) \
93  XX(ARG_FRAGMENT)
94 
95 #define ERR_ARGS(XX) \
96  XX(ERR_ARG_FLAGS) \
97  XX(ERR_ARG_INPUT) \
98 
100 #define XX(name) name,
101  ARGS(XX)
102 #undef XX
103 };
104 
106 #define XX(name) name,
107  ERR_ARGS(XX)
108 #undef XX
109 };
110 
111 #define CHAR_TEST(bits, name, expr) \
112  template <typename T> \
113  static inline bool name(const T ch) { \
114  static_assert(sizeof(ch) >= (bits) / 8, \
115  "Character must be wider than " #bits " bits"); \
116  return (expr); \
117  }
118 
119 #define TWO_CHAR_STRING_TEST(bits, name, expr) \
120  template <typename T> \
121  static inline bool name(const T ch1, const T ch2) { \
122  static_assert(sizeof(ch1) >= (bits) / 8, \
123  "Character must be wider than " #bits " bits"); \
124  return (expr); \
125  } \
126  template <typename T> \
127  static inline bool name(const std::basic_string<T>& str) { \
128  static_assert(sizeof(str[0]) >= (bits) / 8, \
129  "Character must be wider than " #bits " bits"); \
130  return str.length() >= 2 && name(str[0], str[1]); \
131  }
132 
133 // https://infra.spec.whatwg.org/#ascii-tab-or-newline
134 CHAR_TEST(8, IsASCIITabOrNewline, (ch == '\t' || ch == '\n' || ch == '\r'))
135 
136 // https://infra.spec.whatwg.org/#c0-control-or-space
137 CHAR_TEST(8, IsC0ControlOrSpace, (ch >= '\0' && ch <= ' '))
138 
139 // https://infra.spec.whatwg.org/#ascii-digit
140 CHAR_TEST(8, IsASCIIDigit, (ch >= '0' && ch <= '9'))
141 
142 // https://infra.spec.whatwg.org/#ascii-hex-digit
143 CHAR_TEST(8, IsASCIIHexDigit, (IsASCIIDigit(ch) ||
144  (ch >= 'A' && ch <= 'F') ||
145  (ch >= 'a' && ch <= 'f')))
146 
147 // https://infra.spec.whatwg.org/#ascii-alpha
148 CHAR_TEST(8, IsASCIIAlpha, ((ch >= 'A' && ch <= 'Z') ||
149  (ch >= 'a' && ch <= 'z')))
150 
151 // https://infra.spec.whatwg.org/#ascii-alphanumeric
152 CHAR_TEST(8, IsASCIIAlphanumeric, (IsASCIIDigit(ch) || IsASCIIAlpha(ch)))
153 
154 // https://infra.spec.whatwg.org/#ascii-lowercase
155 template <typename T>
156 static inline T ASCIILowercase(T ch) {
157  return IsASCIIAlpha(ch) ? (ch | 0x20) : ch;
158 }
159 
160 // https://url.spec.whatwg.org/#forbidden-host-code-point
161 CHAR_TEST(8, IsForbiddenHostCodePoint,
162  ch == '\0' || ch == '\t' || ch == '\n' || ch == '\r' ||
163  ch == ' ' || ch == '#' || ch == '%' || ch == '/' ||
164  ch == ':' || ch == '?' || ch == '@' || ch == '[' ||
165  ch == '\\' || ch == ']')
166 
167 // https://url.spec.whatwg.org/#windows-drive-letter
168 TWO_CHAR_STRING_TEST(8, IsWindowsDriveLetter,
169  (IsASCIIAlpha(ch1) && (ch2 == ':' || ch2 == '|')))
170 
171 // https://url.spec.whatwg.org/#normalized-windows-drive-letter
172 TWO_CHAR_STRING_TEST(8, IsNormalizedWindowsDriveLetter,
173  (IsASCIIAlpha(ch1) && ch2 == ':'))
174 
175 // If a UTF-16 character is a low/trailing surrogate.
176 CHAR_TEST(16, IsUnicodeTrail, (ch & 0xFC00) == 0xDC00)
177 
178 // If a UTF-16 character is a surrogate.
179 CHAR_TEST(16, IsUnicodeSurrogate, (ch & 0xF800) == 0xD800)
180 
181 // If a UTF-16 surrogate is a low/trailing one.
182 CHAR_TEST(16, IsUnicodeSurrogateTrail, (ch & 0x400) != 0)
183 
184 #undef CHAR_TEST
185 #undef TWO_CHAR_STRING_TEST
186 
187 static const char* hex[256] = {
188  "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07",
189  "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F",
190  "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17",
191  "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F",
192  "%20", "%21", "%22", "%23", "%24", "%25", "%26", "%27",
193  "%28", "%29", "%2A", "%2B", "%2C", "%2D", "%2E", "%2F",
194  "%30", "%31", "%32", "%33", "%34", "%35", "%36", "%37",
195  "%38", "%39", "%3A", "%3B", "%3C", "%3D", "%3E", "%3F",
196  "%40", "%41", "%42", "%43", "%44", "%45", "%46", "%47",
197  "%48", "%49", "%4A", "%4B", "%4C", "%4D", "%4E", "%4F",
198  "%50", "%51", "%52", "%53", "%54", "%55", "%56", "%57",
199  "%58", "%59", "%5A", "%5B", "%5C", "%5D", "%5E", "%5F",
200  "%60", "%61", "%62", "%63", "%64", "%65", "%66", "%67",
201  "%68", "%69", "%6A", "%6B", "%6C", "%6D", "%6E", "%6F",
202  "%70", "%71", "%72", "%73", "%74", "%75", "%76", "%77",
203  "%78", "%79", "%7A", "%7B", "%7C", "%7D", "%7E", "%7F",
204  "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87",
205  "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F",
206  "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97",
207  "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
208  "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7",
209  "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF",
210  "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7",
211  "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF",
212  "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
213  "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF",
214  "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7",
215  "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF",
216  "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7",
217  "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
218  "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7",
219  "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
220 };
221 
222 static const uint8_t C0_CONTROL_ENCODE_SET[32] = {
223  // 00 01 02 03 04 05 06 07
224  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
225  // 08 09 0A 0B 0C 0D 0E 0F
226  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
227  // 10 11 12 13 14 15 16 17
228  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
229  // 18 19 1A 1B 1C 1D 1E 1F
230  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
231  // 20 21 22 23 24 25 26 27
232  0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
233  // 28 29 2A 2B 2C 2D 2E 2F
234  0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
235  // 30 31 32 33 34 35 36 37
236  0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
237  // 38 39 3A 3B 3C 3D 3E 3F
238  0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
239  // 40 41 42 43 44 45 46 47
240  0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
241  // 48 49 4A 4B 4C 4D 4E 4F
242  0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
243  // 50 51 52 53 54 55 56 57
244  0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
245  // 58 59 5A 5B 5C 5D 5E 5F
246  0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
247  // 60 61 62 63 64 65 66 67
248  0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
249  // 68 69 6A 6B 6C 6D 6E 6F
250  0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
251  // 70 71 72 73 74 75 76 77
252  0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
253  // 78 79 7A 7B 7C 7D 7E 7F
254  0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
255  // 80 81 82 83 84 85 86 87
256  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
257  // 88 89 8A 8B 8C 8D 8E 8F
258  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
259  // 90 91 92 93 94 95 96 97
260  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
261  // 98 99 9A 9B 9C 9D 9E 9F
262  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
263  // A0 A1 A2 A3 A4 A5 A6 A7
264  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
265  // A8 A9 AA AB AC AD AE AF
266  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
267  // B0 B1 B2 B3 B4 B5 B6 B7
268  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
269  // B8 B9 BA BB BC BD BE BF
270  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
271  // C0 C1 C2 C3 C4 C5 C6 C7
272  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
273  // C8 C9 CA CB CC CD CE CF
274  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
275  // D0 D1 D2 D3 D4 D5 D6 D7
276  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
277  // D8 D9 DA DB DC DD DE DF
278  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
279  // E0 E1 E2 E3 E4 E5 E6 E7
280  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
281  // E8 E9 EA EB EC ED EE EF
282  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
283  // F0 F1 F2 F3 F4 F5 F6 F7
284  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
285  // F8 F9 FA FB FC FD FE FF
286  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
287 };
288 
289 static const uint8_t PATH_ENCODE_SET[32] = {
290  // 00 01 02 03 04 05 06 07
291  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
292  // 08 09 0A 0B 0C 0D 0E 0F
293  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
294  // 10 11 12 13 14 15 16 17
295  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
296  // 18 19 1A 1B 1C 1D 1E 1F
297  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
298  // 20 21 22 23 24 25 26 27
299  0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00,
300  // 28 29 2A 2B 2C 2D 2E 2F
301  0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
302  // 30 31 32 33 34 35 36 37
303  0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
304  // 38 39 3A 3B 3C 3D 3E 3F
305  0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x80,
306  // 40 41 42 43 44 45 46 47
307  0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
308  // 48 49 4A 4B 4C 4D 4E 4F
309  0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
310  // 50 51 52 53 54 55 56 57
311  0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
312  // 58 59 5A 5B 5C 5D 5E 5F
313  0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
314  // 60 61 62 63 64 65 66 67
315  0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
316  // 68 69 6A 6B 6C 6D 6E 6F
317  0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
318  // 70 71 72 73 74 75 76 77
319  0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
320  // 78 79 7A 7B 7C 7D 7E 7F
321  0x00 | 0x00 | 0x00 | 0x08 | 0x00 | 0x20 | 0x00 | 0x80,
322  // 80 81 82 83 84 85 86 87
323  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
324  // 88 89 8A 8B 8C 8D 8E 8F
325  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
326  // 90 91 92 93 94 95 96 97
327  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
328  // 98 99 9A 9B 9C 9D 9E 9F
329  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
330  // A0 A1 A2 A3 A4 A5 A6 A7
331  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
332  // A8 A9 AA AB AC AD AE AF
333  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
334  // B0 B1 B2 B3 B4 B5 B6 B7
335  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
336  // B8 B9 BA BB BC BD BE BF
337  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
338  // C0 C1 C2 C3 C4 C5 C6 C7
339  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
340  // C8 C9 CA CB CC CD CE CF
341  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
342  // D0 D1 D2 D3 D4 D5 D6 D7
343  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
344  // D8 D9 DA DB DC DD DE DF
345  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
346  // E0 E1 E2 E3 E4 E5 E6 E7
347  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
348  // E8 E9 EA EB EC ED EE EF
349  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
350  // F0 F1 F2 F3 F4 F5 F6 F7
351  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
352  // F8 F9 FA FB FC FD FE FF
353  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
354 };
355 
356 static const uint8_t USERINFO_ENCODE_SET[32] = {
357  // 00 01 02 03 04 05 06 07
358  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
359  // 08 09 0A 0B 0C 0D 0E 0F
360  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
361  // 10 11 12 13 14 15 16 17
362  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
363  // 18 19 1A 1B 1C 1D 1E 1F
364  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
365  // 20 21 22 23 24 25 26 27
366  0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00,
367  // 28 29 2A 2B 2C 2D 2E 2F
368  0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
369  // 30 31 32 33 34 35 36 37
370  0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
371  // 38 39 3A 3B 3C 3D 3E 3F
372  0x00 | 0x00 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
373  // 40 41 42 43 44 45 46 47
374  0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
375  // 48 49 4A 4B 4C 4D 4E 4F
376  0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
377  // 50 51 52 53 54 55 56 57
378  0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
379  // 58 59 5A 5B 5C 5D 5E 5F
380  0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x40 | 0x00,
381  // 60 61 62 63 64 65 66 67
382  0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
383  // 68 69 6A 6B 6C 6D 6E 6F
384  0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
385  // 70 71 72 73 74 75 76 77
386  0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
387  // 78 79 7A 7B 7C 7D 7E 7F
388  0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x00 | 0x80,
389  // 80 81 82 83 84 85 86 87
390  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
391  // 88 89 8A 8B 8C 8D 8E 8F
392  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
393  // 90 91 92 93 94 95 96 97
394  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
395  // 98 99 9A 9B 9C 9D 9E 9F
396  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
397  // A0 A1 A2 A3 A4 A5 A6 A7
398  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
399  // A8 A9 AA AB AC AD AE AF
400  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
401  // B0 B1 B2 B3 B4 B5 B6 B7
402  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
403  // B8 B9 BA BB BC BD BE BF
404  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
405  // C0 C1 C2 C3 C4 C5 C6 C7
406  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
407  // C8 C9 CA CB CC CD CE CF
408  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
409  // D0 D1 D2 D3 D4 D5 D6 D7
410  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
411  // D8 D9 DA DB DC DD DE DF
412  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
413  // E0 E1 E2 E3 E4 E5 E6 E7
414  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
415  // E8 E9 EA EB EC ED EE EF
416  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
417  // F0 F1 F2 F3 F4 F5 F6 F7
418  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
419  // F8 F9 FA FB FC FD FE FF
420  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
421 };
422 
423 static const uint8_t QUERY_ENCODE_SET[32] = {
424  // 00 01 02 03 04 05 06 07
425  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
426  // 08 09 0A 0B 0C 0D 0E 0F
427  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
428  // 10 11 12 13 14 15 16 17
429  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
430  // 18 19 1A 1B 1C 1D 1E 1F
431  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
432  // 20 21 22 23 24 25 26 27
433  0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00,
434  // 28 29 2A 2B 2C 2D 2E 2F
435  0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
436  // 30 31 32 33 34 35 36 37
437  0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
438  // 38 39 3A 3B 3C 3D 3E 3F
439  0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00,
440  // 40 41 42 43 44 45 46 47
441  0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
442  // 48 49 4A 4B 4C 4D 4E 4F
443  0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
444  // 50 51 52 53 54 55 56 57
445  0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
446  // 58 59 5A 5B 5C 5D 5E 5F
447  0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
448  // 60 61 62 63 64 65 66 67
449  0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
450  // 68 69 6A 6B 6C 6D 6E 6F
451  0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
452  // 70 71 72 73 74 75 76 77
453  0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
454  // 78 79 7A 7B 7C 7D 7E 7F
455  0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
456  // 80 81 82 83 84 85 86 87
457  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
458  // 88 89 8A 8B 8C 8D 8E 8F
459  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
460  // 90 91 92 93 94 95 96 97
461  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
462  // 98 99 9A 9B 9C 9D 9E 9F
463  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
464  // A0 A1 A2 A3 A4 A5 A6 A7
465  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
466  // A8 A9 AA AB AC AD AE AF
467  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
468  // B0 B1 B2 B3 B4 B5 B6 B7
469  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
470  // B8 B9 BA BB BC BD BE BF
471  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
472  // C0 C1 C2 C3 C4 C5 C6 C7
473  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
474  // C8 C9 CA CB CC CD CE CF
475  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
476  // D0 D1 D2 D3 D4 D5 D6 D7
477  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
478  // D8 D9 DA DB DC DD DE DF
479  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
480  // E0 E1 E2 E3 E4 E5 E6 E7
481  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
482  // E8 E9 EA EB EC ED EE EF
483  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
484  // F0 F1 F2 F3 F4 F5 F6 F7
485  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
486  // F8 F9 FA FB FC FD FE FF
487  0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
488 };
489 
490 static inline bool BitAt(const uint8_t a[], const uint8_t i) {
491  return !!(a[i >> 3] & (1 << (i & 7)));
492 }
493 
494 // Appends ch to str. If ch position in encode_set is set, the ch will
495 // be percent-encoded then appended.
496 static inline void AppendOrEscape(std::string* str,
497  const unsigned char ch,
498  const uint8_t encode_set[]) {
499  if (BitAt(encode_set, ch))
500  *str += hex[ch];
501  else
502  *str += ch;
503 }
504 
505 template <typename T>
506 static inline unsigned hex2bin(const T ch) {
507  if (ch >= '0' && ch <= '9')
508  return ch - '0';
509  if (ch >= 'A' && ch <= 'F')
510  return 10 + (ch - 'A');
511  if (ch >= 'a' && ch <= 'f')
512  return 10 + (ch - 'a');
513  return static_cast<unsigned>(-1);
514 }
515 
516 static inline void PercentDecode(const char* input,
517  size_t len,
518  std::string* dest) {
519  if (len == 0)
520  return;
521  dest->reserve(len);
522  const char* pointer = input;
523  const char* end = input + len;
524  size_t remaining = pointer - end - 1;
525  while (pointer < end) {
526  const char ch = pointer[0];
527  remaining = (end - pointer) + 1;
528  if (ch != '%' || remaining < 2 ||
529  (ch == '%' &&
530  (!IsASCIIHexDigit(pointer[1]) ||
531  !IsASCIIHexDigit(pointer[2])))) {
532  *dest += ch;
533  pointer++;
534  continue;
535  } else {
536  unsigned a = hex2bin(pointer[1]);
537  unsigned b = hex2bin(pointer[2]);
538  char c = static_cast<char>(a * 16 + b);
539  *dest += c;
540  pointer += 3;
541  }
542  }
543 }
544 
545 #define SPECIALS(XX) \
546  XX("ftp:", 21) \
547  XX("file:", -1) \
548  XX("gopher:", 70) \
549  XX("http:", 80) \
550  XX("https:", 443) \
551  XX("ws:", 80) \
552  XX("wss:", 443)
553 
554 static inline bool IsSpecial(std::string scheme) {
555 #define XX(name, _) if (scheme == name) return true;
556  SPECIALS(XX);
557 #undef XX
558  return false;
559 }
560 
561 static inline int NormalizePort(std::string scheme, int p) {
562 #define XX(name, port) if (scheme == name && p == port) return -1;
563  SPECIALS(XX);
564 #undef XX
565  return p;
566 }
567 
568 #if defined(NODE_HAVE_I18N_SUPPORT)
569 static inline bool ToUnicode(std::string* input, std::string* output) {
570  MaybeStackBuffer<char> buf;
571  if (i18n::ToUnicode(&buf, input->c_str(), input->length()) < 0)
572  return false;
573  output->assign(*buf, buf.length());
574  return true;
575 }
576 
577 static inline bool ToASCII(std::string* input, std::string* output) {
578  MaybeStackBuffer<char> buf;
579  if (i18n::ToASCII(&buf, input->c_str(), input->length()) < 0)
580  return false;
581  output->assign(*buf, buf.length());
582  return true;
583 }
584 #else
585 // Intentional non-ops if ICU is not present.
586 static inline bool ToUnicode(std::string* input, std::string* output) {
587  *output = *input;
588  return true;
589 }
590 
591 static inline bool ToASCII(std::string* input, std::string* output) {
592  *output = *input;
593  return true;
594 }
595 #endif
596 
597 static url_host_type ParseIPv6Host(url_host* host,
598  const char* input,
599  size_t length) {
601  for (unsigned n = 0; n < 8; n++)
602  host->value.ipv6[n] = 0;
603  uint16_t* piece_pointer = &host->value.ipv6[0];
604  uint16_t* last_piece = piece_pointer + 8;
605  uint16_t* compress_pointer = nullptr;
606  const char* pointer = input;
607  const char* end = pointer + length;
608  unsigned value, len, swaps, numbers_seen;
609  char ch = pointer < end ? pointer[0] : kEOL;
610  if (ch == ':') {
611  if (length < 2 || pointer[1] != ':')
612  goto end;
613  pointer += 2;
614  ch = pointer < end ? pointer[0] : kEOL;
615  piece_pointer++;
616  compress_pointer = piece_pointer;
617  }
618  while (ch != kEOL) {
619  if (piece_pointer > last_piece)
620  goto end;
621  if (ch == ':') {
622  if (compress_pointer != nullptr)
623  goto end;
624  pointer++;
625  ch = pointer < end ? pointer[0] : kEOL;
626  piece_pointer++;
627  compress_pointer = piece_pointer;
628  continue;
629  }
630  value = 0;
631  len = 0;
632  while (len < 4 && IsASCIIHexDigit(ch)) {
633  value = value * 0x10 + hex2bin(ch);
634  pointer++;
635  ch = pointer < end ? pointer[0] : kEOL;
636  len++;
637  }
638  switch (ch) {
639  case '.':
640  if (len == 0)
641  goto end;
642  pointer -= len;
643  ch = pointer < end ? pointer[0] : kEOL;
644  if (piece_pointer > last_piece - 2)
645  goto end;
646  numbers_seen = 0;
647  while (ch != kEOL) {
648  value = 0xffffffff;
649  if (numbers_seen > 0) {
650  if (ch == '.' && numbers_seen < 4) {
651  pointer++;
652  ch = pointer < end ? pointer[0] : kEOL;
653  } else {
654  goto end;
655  }
656  }
657  if (!IsASCIIDigit(ch))
658  goto end;
659  while (IsASCIIDigit(ch)) {
660  unsigned number = ch - '0';
661  if (value == 0xffffffff) {
662  value = number;
663  } else if (value == 0) {
664  goto end;
665  } else {
666  value = value * 10 + number;
667  }
668  if (value > 255)
669  goto end;
670  pointer++;
671  ch = pointer < end ? pointer[0] : kEOL;
672  }
673  *piece_pointer = *piece_pointer * 0x100 + value;
674  numbers_seen++;
675  if (numbers_seen == 2 || numbers_seen == 4)
676  piece_pointer++;
677  }
678  if (numbers_seen != 4)
679  goto end;
680  continue;
681  case ':':
682  pointer++;
683  ch = pointer < end ? pointer[0] : kEOL;
684  if (ch == kEOL)
685  goto end;
686  break;
687  case kEOL:
688  break;
689  default:
690  goto end;
691  }
692  *piece_pointer = value;
693  piece_pointer++;
694  }
695 
696  if (compress_pointer != nullptr) {
697  swaps = piece_pointer - compress_pointer;
698  piece_pointer = last_piece - 1;
699  while (piece_pointer != &host->value.ipv6[0] && swaps > 0) {
700  uint16_t temp = *piece_pointer;
701  uint16_t* swap_piece = compress_pointer + swaps - 1;
702  *piece_pointer = *swap_piece;
703  *swap_piece = temp;
704  piece_pointer--;
705  swaps--;
706  }
707  } else if (compress_pointer == nullptr &&
708  piece_pointer != last_piece) {
709  goto end;
710  }
711  type = HOST_TYPE_IPV6;
712  end:
713  host->type = type;
714  return type;
715 }
716 
717 static inline int64_t ParseNumber(const char* start, const char* end) {
718  unsigned R = 10;
719  if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') {
720  start += 2;
721  R = 16;
722  }
723  if (end - start == 0) {
724  return 0;
725  } else if (R == 10 && end - start > 1 && start[0] == '0') {
726  start++;
727  R = 8;
728  }
729  const char* p = start;
730 
731  while (p < end) {
732  const char ch = p[0];
733  switch (R) {
734  case 8:
735  if (ch < '0' || ch > '7')
736  return -1;
737  break;
738  case 10:
739  if (!IsASCIIDigit(ch))
740  return -1;
741  break;
742  case 16:
743  if (!IsASCIIHexDigit(ch))
744  return -1;
745  break;
746  }
747  p++;
748  }
749  return strtoll(start, NULL, R);
750 }
751 
752 static url_host_type ParseIPv4Host(url_host* host,
753  const char* input,
754  size_t length) {
756  const char* pointer = input;
757  const char* mark = input;
758  const char* end = pointer + length;
759  int parts = 0;
760  uint32_t val = 0;
761  uint64_t numbers[4];
762  int tooBigNumbers = 0;
763  if (length == 0)
764  goto end;
765 
766  while (pointer <= end) {
767  const char ch = pointer < end ? pointer[0] : kEOL;
768  const int remaining = end - pointer - 1;
769  if (ch == '.' || ch == kEOL) {
770  if (++parts > 4)
771  goto end;
772  if (pointer == mark)
773  goto end;
774  int64_t n = ParseNumber(mark, pointer);
775  if (n < 0)
776  goto end;
777 
778  if (n > 255) {
779  tooBigNumbers++;
780  }
781  numbers[parts - 1] = n;
782  mark = pointer + 1;
783  if (ch == '.' && remaining == 0)
784  break;
785  }
786  pointer++;
787  }
788  CHECK_GT(parts, 0);
789 
790  // If any but the last item in numbers is greater than 255, return failure.
791  // If the last item in numbers is greater than or equal to
792  // 256^(5 - the number of items in numbers), return failure.
793  if (tooBigNumbers > 1 ||
794  (tooBigNumbers == 1 && numbers[parts - 1] <= 255) ||
795  numbers[parts - 1] >= pow(256, static_cast<double>(5 - parts))) {
796  type = HOST_TYPE_FAILED;
797  goto end;
798  }
799 
800  type = HOST_TYPE_IPV4;
801  val = numbers[parts - 1];
802  for (int n = 0; n < parts - 1; n++) {
803  double b = 3 - n;
804  val += numbers[n] * pow(256, b);
805  }
806 
807  host->value.ipv4 = val;
808  end:
809  host->type = type;
810  return type;
811 }
812 
813 static url_host_type ParseOpaqueHost(url_host* host,
814  const char* input,
815  size_t length) {
817  std::string output;
818  output.reserve(length * 3);
819  for (size_t i = 0; i < length; i++) {
820  const char ch = input[i];
821  if (ch != '%' && IsForbiddenHostCodePoint(ch)) {
822  type = HOST_TYPE_FAILED;
823  goto end;
824  } else {
825  AppendOrEscape(&output, ch, C0_CONTROL_ENCODE_SET);
826  }
827  }
828 
829  host->value.opaque = output;
830  end:
831  host->type = type;
832  return type;
833 }
834 
835 static url_host_type ParseHost(url_host* host,
836  const char* input,
837  size_t length,
838  bool is_special,
839  bool unicode = false) {
841  const char* pointer = input;
842  std::string decoded;
843 
844  if (length == 0)
845  goto end;
846 
847  if (pointer[0] == '[') {
848  if (pointer[length - 1] != ']')
849  goto end;
850  return ParseIPv6Host(host, ++pointer, length - 2);
851  }
852 
853  if (!is_special)
854  return ParseOpaqueHost(host, input, length);
855 
856  // First, we have to percent decode
857  PercentDecode(input, length, &decoded);
858 
859  // Then we have to punycode toASCII
860  if (!ToASCII(&decoded, &decoded))
861  goto end;
862 
863  // If any of the following characters are still present, we have to fail
864  for (size_t n = 0; n < decoded.size(); n++) {
865  const char ch = decoded[n];
866  if (IsForbiddenHostCodePoint(ch)) {
867  goto end;
868  }
869  }
870 
871  // Check to see if it's an IPv4 IP address
872  type = ParseIPv4Host(host, decoded.c_str(), decoded.length());
873  if (type == HOST_TYPE_IPV4 || type == HOST_TYPE_FAILED)
874  goto end;
875 
876  // If the unicode flag is set, run the result through punycode ToUnicode
877  if (unicode && !ToUnicode(&decoded, &decoded))
878  goto end;
879 
880  // It's not an IPv4 or IPv6 address, it must be a domain
881  type = HOST_TYPE_DOMAIN;
882  host->value.domain = decoded;
883 
884  end:
885  host->type = type;
886  return type;
887 }
888 
889 // Locates the longest sequence of 0 segments in an IPv6 address
890 // in order to use the :: compression when serializing
891 static inline uint16_t* FindLongestZeroSequence(uint16_t* values,
892  size_t len) {
893  uint16_t* start = values;
894  uint16_t* end = start + len;
895  uint16_t* result = nullptr;
896 
897  uint16_t* current = nullptr;
898  unsigned counter = 0, longest = 1;
899 
900  while (start < end) {
901  if (*start == 0) {
902  if (current == nullptr)
903  current = start;
904  counter++;
905  } else {
906  if (counter > longest) {
907  longest = counter;
908  result = current;
909  }
910  counter = 0;
911  current = nullptr;
912  }
913  start++;
914  }
915  if (counter > longest)
916  result = current;
917  return result;
918 }
919 
920 static url_host_type WriteHost(url_host* host, std::string* dest) {
921  dest->clear();
922  switch (host->type) {
923  case HOST_TYPE_DOMAIN:
924  *dest = host->value.domain;
925  break;
926  case HOST_TYPE_IPV4: {
927  dest->reserve(15);
928  uint32_t value = host->value.ipv4;
929  for (int n = 0; n < 4; n++) {
930  char buf[4];
931  char* buffer = buf;
932  snprintf(buffer, sizeof(buf), "%d", value % 256);
933  dest->insert(0, buf);
934  if (n < 3)
935  dest->insert(0, 1, '.');
936  value /= 256;
937  }
938  break;
939  }
940  case HOST_TYPE_IPV6: {
941  dest->reserve(41);
942  *dest+= '[';
943  uint16_t* start = &host->value.ipv6[0];
944  uint16_t* compress_pointer =
945  FindLongestZeroSequence(start, 8);
946  bool ignore0 = false;
947  for (int n = 0; n <= 7; n++) {
948  uint16_t* piece = &host->value.ipv6[n];
949  if (ignore0 && *piece == 0)
950  continue;
951  else if (ignore0)
952  ignore0 = false;
953  if (compress_pointer == piece) {
954  *dest += n == 0 ? "::" : ":";
955  ignore0 = true;
956  continue;
957  }
958  char buf[5];
959  char* buffer = buf;
960  snprintf(buffer, sizeof(buf), "%x", *piece);
961  *dest += buf;
962  if (n < 7)
963  *dest += ':';
964  }
965  *dest += ']';
966  break;
967  }
968  case HOST_TYPE_OPAQUE:
969  *dest = host->value.opaque;
970  break;
971  case HOST_TYPE_FAILED:
972  break;
973  }
974  return host->type;
975 }
976 
977 static bool ParseHost(std::string* input,
978  std::string* output,
979  bool is_special,
980  bool unicode = false) {
981  if (input->length() == 0) {
982  output->clear();
983  return true;
984  }
985  url_host host{{""}, HOST_TYPE_DOMAIN};
986  ParseHost(&host, input->c_str(), input->length(), is_special, unicode);
987  if (host.type == HOST_TYPE_FAILED)
988  return false;
989  WriteHost(&host, output);
990  return true;
991 }
992 
993 static inline void Copy(Environment* env,
994  Local<Array> ary,
995  std::vector<std::string>* vec) {
996  const int32_t len = ary->Length();
997  if (len == 0)
998  return; // nothing to copy
999  vec->reserve(len);
1000  for (int32_t n = 0; n < len; n++) {
1001  Local<Value> val = ary->Get(env->context(), n).ToLocalChecked();
1002  if (val->IsString()) {
1003  Utf8Value value(env->isolate(), val.As<String>());
1004  vec->push_back(std::string(*value, value.length()));
1005  }
1006  }
1007 }
1008 
1009 static inline Local<Array> Copy(Environment* env,
1010  const std::vector<std::string>& vec) {
1011  Isolate* isolate = env->isolate();
1012  Local<Array> ary = Array::New(isolate, vec.size());
1013  for (size_t n = 0; n < vec.size(); n++)
1014  ary->Set(env->context(), n, UTF8STRING(isolate, vec[n])).FromJust();
1015  return ary;
1016 }
1017 
1018 static inline void HarvestBase(Environment* env,
1019  struct url_data* base,
1020  Local<Object> base_obj) {
1021  Local<Context> context = env->context();
1022  Local<Value> flags = GET(env, base_obj, "flags");
1023  if (flags->IsInt32())
1024  base->flags = flags->Int32Value(context).FromJust();
1025 
1026  Local<Value> scheme = GET(env, base_obj, "scheme");
1027  base->scheme = Utf8Value(env->isolate(), scheme).out();
1028 
1029  GET_AND_SET(env, base_obj, username, base, URL_FLAGS_HAS_USERNAME);
1030  GET_AND_SET(env, base_obj, password, base, URL_FLAGS_HAS_PASSWORD);
1031  GET_AND_SET(env, base_obj, host, base, URL_FLAGS_HAS_HOST);
1032  GET_AND_SET(env, base_obj, query, base, URL_FLAGS_HAS_QUERY);
1033  GET_AND_SET(env, base_obj, fragment, base, URL_FLAGS_HAS_FRAGMENT);
1034  Local<Value> port = GET(env, base_obj, "port");
1035  if (port->IsInt32())
1036  base->port = port->Int32Value(context).FromJust();
1037  Local<Value> path = GET(env, base_obj, "path");
1038  if (path->IsArray()) {
1039  base->flags |= URL_FLAGS_HAS_PATH;
1040  Copy(env, path.As<Array>(), &(base->path));
1041  }
1042 }
1043 
1044 static inline void HarvestContext(Environment* env,
1045  struct url_data* context,
1046  Local<Object> context_obj) {
1047  Local<Value> flags = GET(env, context_obj, "flags");
1048  if (flags->IsInt32()) {
1049  int32_t _flags = flags->Int32Value(env->context()).FromJust();
1050  if (_flags & URL_FLAGS_SPECIAL)
1051  context->flags |= URL_FLAGS_SPECIAL;
1052  if (_flags & URL_FLAGS_CANNOT_BE_BASE)
1053  context->flags |= URL_FLAGS_CANNOT_BE_BASE;
1054  if (_flags & URL_FLAGS_HAS_USERNAME)
1055  context->flags |= URL_FLAGS_HAS_USERNAME;
1056  if (_flags & URL_FLAGS_HAS_PASSWORD)
1057  context->flags |= URL_FLAGS_HAS_PASSWORD;
1058  if (_flags & URL_FLAGS_HAS_HOST)
1059  context->flags |= URL_FLAGS_HAS_HOST;
1060  }
1061  Local<Value> scheme = GET(env, context_obj, "scheme");
1062  if (scheme->IsString()) {
1063  Utf8Value value(env->isolate(), scheme);
1064  context->scheme.assign(*value, value.length());
1065  }
1066  Local<Value> port = GET(env, context_obj, "port");
1067  if (port->IsInt32())
1068  context->port = port->Int32Value(env->context()).FromJust();
1069  if (context->flags & URL_FLAGS_HAS_USERNAME) {
1070  Local<Value> username = GET(env, context_obj, "username");
1071  CHECK(username->IsString());
1072  Utf8Value value(env->isolate(), username);
1073  context->username.assign(*value, value.length());
1074  }
1075  if (context->flags & URL_FLAGS_HAS_PASSWORD) {
1076  Local<Value> password = GET(env, context_obj, "password");
1077  CHECK(password->IsString());
1078  Utf8Value value(env->isolate(), password);
1079  context->password.assign(*value, value.length());
1080  }
1081  Local<Value> host = GET(env, context_obj, "host");
1082  if (host->IsString()) {
1083  Utf8Value value(env->isolate(), host);
1084  context->host.assign(*value, value.length());
1085  }
1086 }
1087 
1088 // Single dot segment can be ".", "%2e", or "%2E"
1089 static inline bool IsSingleDotSegment(std::string str) {
1090  switch (str.size()) {
1091  case 1:
1092  return str == ".";
1093  case 3:
1094  return str[0] == '%' &&
1095  str[1] == '2' &&
1096  ASCIILowercase(str[2]) == 'e';
1097  default:
1098  return false;
1099  }
1100 }
1101 
1102 // Double dot segment can be:
1103 // "..", ".%2e", ".%2E", "%2e.", "%2E.",
1104 // "%2e%2e", "%2E%2E", "%2e%2E", or "%2E%2e"
1105 static inline bool IsDoubleDotSegment(std::string str) {
1106  switch (str.size()) {
1107  case 2:
1108  return str == "..";
1109  case 4:
1110  if (str[0] != '.' && str[0] != '%')
1111  return false;
1112  return ((str[0] == '.' &&
1113  str[1] == '%' &&
1114  str[2] == '2' &&
1115  ASCIILowercase(str[3]) == 'e') ||
1116  (str[0] == '%' &&
1117  str[1] == '2' &&
1118  ASCIILowercase(str[2]) == 'e' &&
1119  str[3] == '.'));
1120  case 6:
1121  return (str[0] == '%' &&
1122  str[1] == '2' &&
1123  ASCIILowercase(str[2]) == 'e' &&
1124  str[3] == '%' &&
1125  str[4] == '2' &&
1126  ASCIILowercase(str[5]) == 'e');
1127  default:
1128  return false;
1129  }
1130 }
1131 
1132 static inline void ShortenUrlPath(struct url_data* url) {
1133  if (url->path.empty()) return;
1134  if (url->path.size() == 1 && url->scheme == "file:" &&
1135  IsNormalizedWindowsDriveLetter(url->path[0])) return;
1136  url->path.pop_back();
1137 }
1138 
1139 void URL::Parse(const char* input,
1140  size_t len,
1141  enum url_parse_state state_override,
1142  struct url_data* url,
1143  bool has_url,
1144  const struct url_data* base,
1145  bool has_base) {
1146  const char* p = input;
1147  const char* end = input + len;
1148 
1149  if (!has_url) {
1150  for (const char* ptr = p; ptr < end; ptr++) {
1151  if (IsC0ControlOrSpace(*ptr))
1152  p++;
1153  else
1154  break;
1155  }
1156  for (const char* ptr = end - 1; ptr >= p; ptr--) {
1157  if (IsC0ControlOrSpace(*ptr))
1158  end--;
1159  else
1160  break;
1161  }
1162  len = end - p;
1163  }
1164 
1165  std::string whitespace_stripped;
1166  whitespace_stripped.reserve(len);
1167  for (const char* ptr = p; ptr < end; ptr++)
1168  if (!IsASCIITabOrNewline(*ptr))
1169  whitespace_stripped += *ptr;
1170 
1171  input = whitespace_stripped.c_str();
1172  len = whitespace_stripped.size();
1173  p = input;
1174  end = input + len;
1175 
1176  bool atflag = false;
1177  bool sbflag = false;
1178  bool uflag = false;
1179 
1180  std::string buffer;
1181  url->scheme.reserve(len);
1182  url->username.reserve(len);
1183  url->password.reserve(len);
1184  url->host.reserve(len);
1185  url->path.reserve(len);
1186  url->query.reserve(len);
1187  url->fragment.reserve(len);
1188  buffer.reserve(len);
1189 
1190  // Set the initial parse state.
1191  const bool has_state_override = state_override != kUnknownState;
1192  enum url_parse_state state = has_state_override ? state_override :
1193  kSchemeStart;
1194 
1195  if (state < kSchemeStart || state > kFragment) {
1196  url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
1197  return;
1198  }
1199 
1200  while (p <= end) {
1201  const char ch = p < end ? p[0] : kEOL;
1202  const size_t remaining = end == p ? 0 : (end - p - 1);
1203 
1204  bool special = (url->flags & URL_FLAGS_SPECIAL);
1205  bool cannot_be_base;
1206  const bool special_back_slash = (special && ch == '\\');
1207  switch (state) {
1208  case kSchemeStart:
1209  if (IsASCIIAlpha(ch)) {
1210  buffer += ASCIILowercase(ch);
1211  state = kScheme;
1212  } else if (!has_state_override) {
1213  state = kNoScheme;
1214  continue;
1215  } else {
1216  url->flags |= URL_FLAGS_FAILED;
1217  return;
1218  }
1219  break;
1220  case kScheme:
1221  if (IsASCIIAlphanumeric(ch) || ch == '+' || ch == '-' || ch == '.') {
1222  buffer += ASCIILowercase(ch);
1223  } else if (ch == ':' || (has_state_override && ch == kEOL)) {
1224  if (has_state_override && buffer.size() == 0) {
1225  url->flags |= URL_FLAGS_TERMINATED;
1226  return;
1227  }
1228  buffer += ':';
1229 
1230  bool new_is_special = IsSpecial(buffer);
1231 
1232  if (has_state_override) {
1233  if ((special != new_is_special) ||
1234  ((buffer == "file:") &&
1235  ((url->flags & URL_FLAGS_HAS_USERNAME) ||
1236  (url->flags & URL_FLAGS_HAS_PASSWORD) ||
1237  (url->port != -1)))) {
1238  url->flags |= URL_FLAGS_TERMINATED;
1239  return;
1240  }
1241 
1242  // File scheme && (host == empty or null) check left to JS-land
1243  // as it can be done before even entering C++ binding.
1244  }
1245 
1246  url->scheme = buffer;
1247  url->port = NormalizePort(url->scheme, url->port);
1248  if (new_is_special) {
1249  url->flags |= URL_FLAGS_SPECIAL;
1250  special = true;
1251  } else {
1252  url->flags &= ~URL_FLAGS_SPECIAL;
1253  special = false;
1254  }
1255  buffer.clear();
1256  if (has_state_override)
1257  return;
1258  if (url->scheme == "file:") {
1259  state = kFile;
1260  } else if (special &&
1261  has_base &&
1262  url->scheme == base->scheme) {
1263  state = kSpecialRelativeOrAuthority;
1264  } else if (special) {
1265  state = kSpecialAuthoritySlashes;
1266  } else if (p[1] == '/') {
1267  state = kPathOrAuthority;
1268  p++;
1269  } else {
1270  url->flags |= URL_FLAGS_CANNOT_BE_BASE;
1271  url->flags |= URL_FLAGS_HAS_PATH;
1272  url->path.push_back("");
1273  state = kCannotBeBase;
1274  }
1275  } else if (!has_state_override) {
1276  buffer.clear();
1277  state = kNoScheme;
1278  p = input;
1279  continue;
1280  } else {
1281  url->flags |= URL_FLAGS_FAILED;
1282  return;
1283  }
1284  break;
1285  case kNoScheme:
1286  cannot_be_base = has_base && (base->flags & URL_FLAGS_CANNOT_BE_BASE);
1287  if (!has_base || (cannot_be_base && ch != '#')) {
1288  url->flags |= URL_FLAGS_FAILED;
1289  return;
1290  } else if (cannot_be_base && ch == '#') {
1291  url->scheme = base->scheme;
1292  if (IsSpecial(url->scheme)) {
1293  url->flags |= URL_FLAGS_SPECIAL;
1294  special = true;
1295  } else {
1296  url->flags &= ~URL_FLAGS_SPECIAL;
1297  special = false;
1298  }
1299  if (base->flags & URL_FLAGS_HAS_PATH) {
1300  url->flags |= URL_FLAGS_HAS_PATH;
1301  url->path = base->path;
1302  }
1303  if (base->flags & URL_FLAGS_HAS_QUERY) {
1304  url->flags |= URL_FLAGS_HAS_QUERY;
1305  url->query = base->query;
1306  }
1307  if (base->flags & URL_FLAGS_HAS_FRAGMENT) {
1308  url->flags |= URL_FLAGS_HAS_FRAGMENT;
1309  url->fragment = base->fragment;
1310  }
1311  url->flags |= URL_FLAGS_CANNOT_BE_BASE;
1312  state = kFragment;
1313  } else if (has_base &&
1314  base->scheme != "file:") {
1315  state = kRelative;
1316  continue;
1317  } else {
1318  url->scheme = "file:";
1319  url->flags |= URL_FLAGS_SPECIAL;
1320  special = true;
1321  state = kFile;
1322  continue;
1323  }
1324  break;
1325  case kSpecialRelativeOrAuthority:
1326  if (ch == '/' && p[1] == '/') {
1327  state = kSpecialAuthorityIgnoreSlashes;
1328  p++;
1329  } else {
1330  state = kRelative;
1331  continue;
1332  }
1333  break;
1334  case kPathOrAuthority:
1335  if (ch == '/') {
1336  state = kAuthority;
1337  } else {
1338  state = kPath;
1339  continue;
1340  }
1341  break;
1342  case kRelative:
1343  url->scheme = base->scheme;
1344  if (IsSpecial(url->scheme)) {
1345  url->flags |= URL_FLAGS_SPECIAL;
1346  special = true;
1347  } else {
1348  url->flags &= ~URL_FLAGS_SPECIAL;
1349  special = false;
1350  }
1351  switch (ch) {
1352  case kEOL:
1353  if (base->flags & URL_FLAGS_HAS_USERNAME) {
1354  url->flags |= URL_FLAGS_HAS_USERNAME;
1355  url->username = base->username;
1356  }
1357  if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1358  url->flags |= URL_FLAGS_HAS_PASSWORD;
1359  url->password = base->password;
1360  }
1361  if (base->flags & URL_FLAGS_HAS_HOST) {
1362  url->flags |= URL_FLAGS_HAS_HOST;
1363  url->host = base->host;
1364  }
1365  if (base->flags & URL_FLAGS_HAS_QUERY) {
1366  url->flags |= URL_FLAGS_HAS_QUERY;
1367  url->query = base->query;
1368  }
1369  if (base->flags & URL_FLAGS_HAS_PATH) {
1370  url->flags |= URL_FLAGS_HAS_PATH;
1371  url->path = base->path;
1372  }
1373  url->port = base->port;
1374  break;
1375  case '/':
1376  state = kRelativeSlash;
1377  break;
1378  case '?':
1379  if (base->flags & URL_FLAGS_HAS_USERNAME) {
1380  url->flags |= URL_FLAGS_HAS_USERNAME;
1381  url->username = base->username;
1382  }
1383  if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1384  url->flags |= URL_FLAGS_HAS_PASSWORD;
1385  url->password = base->password;
1386  }
1387  if (base->flags & URL_FLAGS_HAS_HOST) {
1388  url->flags |= URL_FLAGS_HAS_HOST;
1389  url->host = base->host;
1390  }
1391  if (base->flags & URL_FLAGS_HAS_PATH) {
1392  url->flags |= URL_FLAGS_HAS_PATH;
1393  url->path = base->path;
1394  }
1395  url->port = base->port;
1396  state = kQuery;
1397  break;
1398  case '#':
1399  if (base->flags & URL_FLAGS_HAS_USERNAME) {
1400  url->flags |= URL_FLAGS_HAS_USERNAME;
1401  url->username = base->username;
1402  }
1403  if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1404  url->flags |= URL_FLAGS_HAS_PASSWORD;
1405  url->password = base->password;
1406  }
1407  if (base->flags & URL_FLAGS_HAS_HOST) {
1408  url->flags |= URL_FLAGS_HAS_HOST;
1409  url->host = base->host;
1410  }
1411  if (base->flags & URL_FLAGS_HAS_QUERY) {
1412  url->flags |= URL_FLAGS_HAS_QUERY;
1413  url->query = base->query;
1414  }
1415  if (base->flags & URL_FLAGS_HAS_PATH) {
1416  url->flags |= URL_FLAGS_HAS_PATH;
1417  url->path = base->path;
1418  }
1419  url->port = base->port;
1420  state = kFragment;
1421  break;
1422  default:
1423  if (special_back_slash) {
1424  state = kRelativeSlash;
1425  } else {
1426  if (base->flags & URL_FLAGS_HAS_USERNAME) {
1427  url->flags |= URL_FLAGS_HAS_USERNAME;
1428  url->username = base->username;
1429  }
1430  if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1431  url->flags |= URL_FLAGS_HAS_PASSWORD;
1432  url->password = base->password;
1433  }
1434  if (base->flags & URL_FLAGS_HAS_HOST) {
1435  url->flags |= URL_FLAGS_HAS_HOST;
1436  url->host = base->host;
1437  }
1438  if (base->flags & URL_FLAGS_HAS_PATH) {
1439  url->flags |= URL_FLAGS_HAS_PATH;
1440  url->path = base->path;
1441  ShortenUrlPath(url);
1442  }
1443  url->port = base->port;
1444  state = kPath;
1445  continue;
1446  }
1447  }
1448  break;
1449  case kRelativeSlash:
1450  if (IsSpecial(url->scheme) && (ch == '/' || ch == '\\')) {
1451  state = kSpecialAuthorityIgnoreSlashes;
1452  } else if (ch == '/') {
1453  state = kAuthority;
1454  } else {
1455  if (base->flags & URL_FLAGS_HAS_USERNAME) {
1456  url->flags |= URL_FLAGS_HAS_USERNAME;
1457  url->username = base->username;
1458  }
1459  if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1460  url->flags |= URL_FLAGS_HAS_PASSWORD;
1461  url->password = base->password;
1462  }
1463  if (base->flags & URL_FLAGS_HAS_HOST) {
1464  url->flags |= URL_FLAGS_HAS_HOST;
1465  url->host = base->host;
1466  }
1467  url->port = base->port;
1468  state = kPath;
1469  continue;
1470  }
1471  break;
1472  case kSpecialAuthoritySlashes:
1473  state = kSpecialAuthorityIgnoreSlashes;
1474  if (ch == '/' && p[1] == '/') {
1475  p++;
1476  } else {
1477  continue;
1478  }
1479  break;
1480  case kSpecialAuthorityIgnoreSlashes:
1481  if (ch != '/' && ch != '\\') {
1482  state = kAuthority;
1483  continue;
1484  }
1485  break;
1486  case kAuthority:
1487  if (ch == '@') {
1488  if (atflag) {
1489  buffer.reserve(buffer.size() + 3);
1490  buffer.insert(0, "%40");
1491  }
1492  atflag = true;
1493  const size_t blen = buffer.size();
1494  if (blen > 0 && buffer[0] != ':') {
1495  url->flags |= URL_FLAGS_HAS_USERNAME;
1496  }
1497  for (size_t n = 0; n < blen; n++) {
1498  const char bch = buffer[n];
1499  if (bch == ':') {
1500  url->flags |= URL_FLAGS_HAS_PASSWORD;
1501  if (!uflag) {
1502  uflag = true;
1503  continue;
1504  }
1505  }
1506  if (uflag) {
1507  AppendOrEscape(&url->password, bch, USERINFO_ENCODE_SET);
1508  } else {
1509  AppendOrEscape(&url->username, bch, USERINFO_ENCODE_SET);
1510  }
1511  }
1512  buffer.clear();
1513  } else if (ch == kEOL ||
1514  ch == '/' ||
1515  ch == '?' ||
1516  ch == '#' ||
1517  special_back_slash) {
1518  if (atflag && buffer.size() == 0) {
1519  url->flags |= URL_FLAGS_FAILED;
1520  return;
1521  }
1522  p -= buffer.size() + 1;
1523  buffer.clear();
1524  state = kHost;
1525  } else {
1526  buffer += ch;
1527  }
1528  break;
1529  case kHost:
1530  case kHostname:
1531  if (has_state_override && url->scheme == "file:") {
1532  state = kFileHost;
1533  continue;
1534  } else if (ch == ':' && !sbflag) {
1535  if (buffer.size() == 0) {
1536  url->flags |= URL_FLAGS_FAILED;
1537  return;
1538  }
1539  url->flags |= URL_FLAGS_HAS_HOST;
1540  if (!ParseHost(&buffer, &url->host, special)) {
1541  url->flags |= URL_FLAGS_FAILED;
1542  return;
1543  }
1544  buffer.clear();
1545  state = kPort;
1546  if (state_override == kHostname) {
1547  return;
1548  }
1549  } else if (ch == kEOL ||
1550  ch == '/' ||
1551  ch == '?' ||
1552  ch == '#' ||
1553  special_back_slash) {
1554  p--;
1555  if (special && buffer.size() == 0) {
1556  url->flags |= URL_FLAGS_FAILED;
1557  return;
1558  }
1559  if (has_state_override &&
1560  buffer.size() == 0 &&
1561  ((url->username.size() > 0 || url->password.size() > 0) ||
1562  url->port != -1)) {
1563  url->flags |= URL_FLAGS_TERMINATED;
1564  return;
1565  }
1566  url->flags |= URL_FLAGS_HAS_HOST;
1567  if (!ParseHost(&buffer, &url->host, special)) {
1568  url->flags |= URL_FLAGS_FAILED;
1569  return;
1570  }
1571  buffer.clear();
1572  state = kPathStart;
1573  if (has_state_override) {
1574  return;
1575  }
1576  } else {
1577  if (ch == '[')
1578  sbflag = true;
1579  if (ch == ']')
1580  sbflag = false;
1581  buffer += ch;
1582  }
1583  break;
1584  case kPort:
1585  if (IsASCIIDigit(ch)) {
1586  buffer += ch;
1587  } else if (has_state_override ||
1588  ch == kEOL ||
1589  ch == '/' ||
1590  ch == '?' ||
1591  ch == '#' ||
1592  special_back_slash) {
1593  if (buffer.size() > 0) {
1594  int port = 0;
1595  for (size_t i = 0; i < buffer.size(); i++)
1596  port = port * 10 + buffer[i] - '0';
1597  if (port < 0 || port > 0xffff) {
1598  // TODO(TimothyGu): This hack is currently needed for the host
1599  // setter since it needs access to hostname if it is valid, and
1600  // if the FAILED flag is set the entire response to JS layer
1601  // will be empty.
1602  if (state_override == kHost)
1603  url->port = -1;
1604  else
1605  url->flags |= URL_FLAGS_FAILED;
1606  return;
1607  }
1608  url->port = NormalizePort(url->scheme, port);
1609  buffer.clear();
1610  } else if (has_state_override) {
1611  // TODO(TimothyGu): Similar case as above.
1612  if (state_override == kHost)
1613  url->port = -1;
1614  else
1615  url->flags |= URL_FLAGS_TERMINATED;
1616  return;
1617  }
1618  state = kPathStart;
1619  continue;
1620  } else {
1621  url->flags |= URL_FLAGS_FAILED;
1622  return;
1623  }
1624  break;
1625  case kFile:
1626  url->scheme = "file:";
1627  if (ch == '/' || ch == '\\') {
1628  state = kFileSlash;
1629  } else if (has_base && base->scheme == "file:") {
1630  switch (ch) {
1631  case kEOL:
1632  if (base->flags & URL_FLAGS_HAS_HOST) {
1633  url->flags |= URL_FLAGS_HAS_HOST;
1634  url->host = base->host;
1635  }
1636  if (base->flags & URL_FLAGS_HAS_PATH) {
1637  url->flags |= URL_FLAGS_HAS_PATH;
1638  url->path = base->path;
1639  }
1640  if (base->flags & URL_FLAGS_HAS_QUERY) {
1641  url->flags |= URL_FLAGS_HAS_QUERY;
1642  url->query = base->query;
1643  }
1644  break;
1645  case '?':
1646  if (base->flags & URL_FLAGS_HAS_HOST) {
1647  url->flags |= URL_FLAGS_HAS_HOST;
1648  url->host = base->host;
1649  }
1650  if (base->flags & URL_FLAGS_HAS_PATH) {
1651  url->flags |= URL_FLAGS_HAS_PATH;
1652  url->path = base->path;
1653  }
1654  url->flags |= URL_FLAGS_HAS_QUERY;
1655  url->query.clear();
1656  state = kQuery;
1657  break;
1658  case '#':
1659  if (base->flags & URL_FLAGS_HAS_HOST) {
1660  url->flags |= URL_FLAGS_HAS_HOST;
1661  url->host = base->host;
1662  }
1663  if (base->flags & URL_FLAGS_HAS_PATH) {
1664  url->flags |= URL_FLAGS_HAS_PATH;
1665  url->path = base->path;
1666  }
1667  if (base->flags & URL_FLAGS_HAS_QUERY) {
1668  url->flags |= URL_FLAGS_HAS_QUERY;
1669  url->query = base->query;
1670  }
1671  url->flags |= URL_FLAGS_HAS_FRAGMENT;
1672  url->fragment.clear();
1673  state = kFragment;
1674  break;
1675  default:
1676  if ((remaining == 0 ||
1677  !IsWindowsDriveLetter(ch, p[1]) ||
1678  (remaining >= 2 &&
1679  p[2] != '/' &&
1680  p[2] != '\\' &&
1681  p[2] != '?' &&
1682  p[2] != '#'))) {
1683  if (base->flags & URL_FLAGS_HAS_HOST) {
1684  url->flags |= URL_FLAGS_HAS_HOST;
1685  url->host = base->host;
1686  }
1687  if (base->flags & URL_FLAGS_HAS_PATH) {
1688  url->flags |= URL_FLAGS_HAS_PATH;
1689  url->path = base->path;
1690  }
1691  ShortenUrlPath(url);
1692  }
1693  state = kPath;
1694  continue;
1695  }
1696  } else {
1697  state = kPath;
1698  continue;
1699  }
1700  break;
1701  case kFileSlash:
1702  if (ch == '/' || ch == '\\') {
1703  state = kFileHost;
1704  } else {
1705  if (has_base &&
1706  base->scheme == "file:") {
1707  if (IsNormalizedWindowsDriveLetter(base->path[0])) {
1708  url->flags |= URL_FLAGS_HAS_PATH;
1709  url->path.push_back(base->path[0]);
1710  } else {
1711  if (base->flags & URL_FLAGS_HAS_HOST) {
1712  url->flags |= URL_FLAGS_HAS_HOST;
1713  url->host = base->host;
1714  } else {
1715  url->flags &= ~URL_FLAGS_HAS_HOST;
1716  url->host.clear();
1717  }
1718  }
1719  }
1720  state = kPath;
1721  continue;
1722  }
1723  break;
1724  case kFileHost:
1725  if (ch == kEOL ||
1726  ch == '/' ||
1727  ch == '\\' ||
1728  ch == '?' ||
1729  ch == '#') {
1730  if (!has_state_override &&
1731  buffer.size() == 2 &&
1732  IsWindowsDriveLetter(buffer)) {
1733  state = kPath;
1734  } else if (buffer.size() == 0) {
1735  url->flags |= URL_FLAGS_HAS_HOST;
1736  url->host.clear();
1737  if (has_state_override)
1738  return;
1739  state = kPathStart;
1740  } else {
1741  std::string host;
1742  if (!ParseHost(&buffer, &host, special)) {
1743  url->flags |= URL_FLAGS_FAILED;
1744  return;
1745  }
1746  if (host == "localhost")
1747  host.clear();
1748  url->flags |= URL_FLAGS_HAS_HOST;
1749  url->host = host;
1750  if (has_state_override)
1751  return;
1752  buffer.clear();
1753  state = kPathStart;
1754  }
1755  continue;
1756  } else {
1757  buffer += ch;
1758  }
1759  break;
1760  case kPathStart:
1761  if (IsSpecial(url->scheme)) {
1762  state = kPath;
1763  if (ch != '/' && ch != '\\') {
1764  continue;
1765  }
1766  } else if (!has_state_override && ch == '?') {
1767  url->flags |= URL_FLAGS_HAS_QUERY;
1768  url->query.clear();
1769  state = kQuery;
1770  } else if (!has_state_override && ch == '#') {
1771  url->flags |= URL_FLAGS_HAS_FRAGMENT;
1772  url->fragment.clear();
1773  state = kFragment;
1774  } else if (ch != kEOL) {
1775  state = kPath;
1776  if (ch != '/') {
1777  continue;
1778  }
1779  }
1780  break;
1781  case kPath:
1782  if (ch == kEOL ||
1783  ch == '/' ||
1784  special_back_slash ||
1785  (!has_state_override && (ch == '?' || ch == '#'))) {
1786  if (IsDoubleDotSegment(buffer)) {
1787  ShortenUrlPath(url);
1788  if (ch != '/' && !special_back_slash) {
1789  url->flags |= URL_FLAGS_HAS_PATH;
1790  url->path.push_back("");
1791  }
1792  } else if (IsSingleDotSegment(buffer) &&
1793  ch != '/' && !special_back_slash) {
1794  url->flags |= URL_FLAGS_HAS_PATH;
1795  url->path.push_back("");
1796  } else if (!IsSingleDotSegment(buffer)) {
1797  if (url->scheme == "file:" &&
1798  url->path.empty() &&
1799  buffer.size() == 2 &&
1800  IsWindowsDriveLetter(buffer)) {
1801  if ((url->flags & URL_FLAGS_HAS_HOST) &&
1802  !url->host.empty()) {
1803  url->host.clear();
1804  url->flags |= URL_FLAGS_HAS_HOST;
1805  }
1806  buffer[1] = ':';
1807  }
1808  url->flags |= URL_FLAGS_HAS_PATH;
1809  std::string segment(buffer.c_str(), buffer.size());
1810  url->path.push_back(segment);
1811  }
1812  buffer.clear();
1813  if (url->scheme == "file:" &&
1814  (ch == kEOL ||
1815  ch == '?' ||
1816  ch == '#')) {
1817  while (url->path.size() > 1 && url->path[0].length() == 0) {
1818  url->path.erase(url->path.begin());
1819  }
1820  }
1821  if (ch == '?') {
1822  url->flags |= URL_FLAGS_HAS_QUERY;
1823  state = kQuery;
1824  } else if (ch == '#') {
1825  state = kFragment;
1826  }
1827  } else {
1828  AppendOrEscape(&buffer, ch, PATH_ENCODE_SET);
1829  }
1830  break;
1831  case kCannotBeBase:
1832  switch (ch) {
1833  case '?':
1834  state = kQuery;
1835  break;
1836  case '#':
1837  state = kFragment;
1838  break;
1839  default:
1840  if (url->path.size() == 0)
1841  url->path.push_back("");
1842  if (url->path.size() > 0 && ch != kEOL)
1843  AppendOrEscape(&url->path[0], ch, C0_CONTROL_ENCODE_SET);
1844  }
1845  break;
1846  case kQuery:
1847  if (ch == kEOL || (!has_state_override && ch == '#')) {
1848  url->flags |= URL_FLAGS_HAS_QUERY;
1849  url->query = buffer;
1850  buffer.clear();
1851  if (ch == '#')
1852  state = kFragment;
1853  } else {
1854  AppendOrEscape(&buffer, ch, QUERY_ENCODE_SET);
1855  }
1856  break;
1857  case kFragment:
1858  switch (ch) {
1859  case kEOL:
1860  url->flags |= URL_FLAGS_HAS_FRAGMENT;
1861  url->fragment = buffer;
1862  break;
1863  case 0:
1864  break;
1865  default:
1866  AppendOrEscape(&buffer, ch, C0_CONTROL_ENCODE_SET);
1867  }
1868  break;
1869  default:
1870  url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
1871  return;
1872  }
1873 
1874  p++;
1875  }
1876 } // NOLINT(readability/fn_size)
1877 
1878 static inline void SetArgs(Environment* env,
1879  Local<Value> argv[],
1880  const struct url_data* url) {
1881  Isolate* isolate = env->isolate();
1882  argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url->flags);
1883  argv[ARG_PROTOCOL] = OneByteString(isolate, url->scheme.c_str());
1884  if (url->flags & URL_FLAGS_HAS_USERNAME)
1885  argv[ARG_USERNAME] = UTF8STRING(isolate, url->username);
1886  if (url->flags & URL_FLAGS_HAS_PASSWORD)
1887  argv[ARG_PASSWORD] = UTF8STRING(isolate, url->password);
1888  if (url->flags & URL_FLAGS_HAS_HOST)
1889  argv[ARG_HOST] = UTF8STRING(isolate, url->host);
1890  if (url->flags & URL_FLAGS_HAS_QUERY)
1891  argv[ARG_QUERY] = UTF8STRING(isolate, url->query);
1892  if (url->flags & URL_FLAGS_HAS_FRAGMENT)
1893  argv[ARG_FRAGMENT] = UTF8STRING(isolate, url->fragment);
1894  if (url->port > -1)
1895  argv[ARG_PORT] = Integer::New(isolate, url->port);
1896  if (url->flags & URL_FLAGS_HAS_PATH)
1897  argv[ARG_PATH] = Copy(env, url->path);
1898 }
1899 
1900 static void Parse(Environment* env,
1901  Local<Value> recv,
1902  const char* input,
1903  const size_t len,
1904  enum url_parse_state state_override,
1905  Local<Value> base_obj,
1906  Local<Value> context_obj,
1907  Local<Function> cb,
1908  Local<Value> error_cb) {
1909  Isolate* isolate = env->isolate();
1910  Local<Context> context = env->context();
1911  HandleScope handle_scope(isolate);
1912  Context::Scope context_scope(context);
1913 
1914  const bool has_context = context_obj->IsObject();
1915  const bool has_base = base_obj->IsObject();
1916 
1917  struct url_data base;
1918  struct url_data url;
1919  if (has_context)
1920  HarvestContext(env, &url, context_obj.As<Object>());
1921  if (has_base)
1922  HarvestBase(env, &base, base_obj.As<Object>());
1923 
1924  URL::Parse(input, len, state_override, &url, has_context, &base, has_base);
1925  if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) ||
1926  ((state_override != kUnknownState) &&
1927  (url.flags & URL_FLAGS_TERMINATED)))
1928  return;
1929 
1930  // Define the return value placeholders
1931  const Local<Value> undef = Undefined(isolate);
1932  const Local<Value> null = Null(isolate);
1933  if (!(url.flags & URL_FLAGS_FAILED)) {
1934  Local<Value> argv[9] = {
1935  undef,
1936  undef,
1937  undef,
1938  undef,
1939  null, // host defaults to null
1940  null, // port defaults to null
1941  undef,
1942  null, // query defaults to null
1943  null, // fragment defaults to null
1944  };
1945  SetArgs(env, argv, &url);
1946  cb->Call(context, recv, arraysize(argv), argv).FromMaybe(Local<Value>());
1947  } else if (error_cb->IsFunction()) {
1948  Local<Value> argv[2] = { undef, undef };
1949  argv[ERR_ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags);
1950  argv[ERR_ARG_INPUT] =
1951  String::NewFromUtf8(env->isolate(),
1952  input,
1953  v8::NewStringType::kNormal).ToLocalChecked();
1954  error_cb.As<Function>()->Call(context, recv, arraysize(argv), argv)
1955  .FromMaybe(Local<Value>());
1956  }
1957 }
1958 
1959 static void Parse(const FunctionCallbackInfo<Value>& args) {
1960  Environment* env = Environment::GetCurrent(args);
1961  CHECK_GE(args.Length(), 5);
1962  CHECK(args[0]->IsString()); // input
1963  CHECK(args[2]->IsUndefined() || // base context
1964  args[2]->IsNull() ||
1965  args[2]->IsObject());
1966  CHECK(args[3]->IsUndefined() || // context
1967  args[3]->IsNull() ||
1968  args[3]->IsObject());
1969  CHECK(args[4]->IsFunction()); // complete callback
1970  CHECK(args[5]->IsUndefined() || args[5]->IsFunction()); // error callback
1971 
1972  Utf8Value input(env->isolate(), args[0]);
1973  enum url_parse_state state_override = kUnknownState;
1974  if (args[1]->IsNumber()) {
1975  state_override = static_cast<enum url_parse_state>(
1976  args[1]->Uint32Value(env->context()).FromJust());
1977  }
1978 
1979  Parse(env, args.This(),
1980  *input, input.length(),
1981  state_override,
1982  args[2],
1983  args[3],
1984  args[4].As<Function>(),
1985  args[5]);
1986 }
1987 
1988 static void EncodeAuthSet(const FunctionCallbackInfo<Value>& args) {
1989  Environment* env = Environment::GetCurrent(args);
1990  CHECK_GE(args.Length(), 1);
1991  CHECK(args[0]->IsString());
1992  Utf8Value value(env->isolate(), args[0]);
1993  std::string output;
1994  const size_t len = value.length();
1995  output.reserve(len);
1996  for (size_t n = 0; n < len; n++) {
1997  const char ch = (*value)[n];
1998  AppendOrEscape(&output, ch, USERINFO_ENCODE_SET);
1999  }
2000  args.GetReturnValue().Set(
2001  String::NewFromUtf8(env->isolate(),
2002  output.c_str(),
2003  v8::NewStringType::kNormal).ToLocalChecked());
2004 }
2005 
2006 static void ToUSVString(const FunctionCallbackInfo<Value>& args) {
2007  Environment* env = Environment::GetCurrent(args);
2008  CHECK_GE(args.Length(), 2);
2009  CHECK(args[0]->IsString());
2010  CHECK(args[1]->IsNumber());
2011 
2012  TwoByteValue value(env->isolate(), args[0]);
2013  const size_t n = value.length();
2014 
2015  const int64_t start = args[1]->IntegerValue(env->context()).FromJust();
2016  CHECK_GE(start, 0);
2017 
2018  for (size_t i = start; i < n; i++) {
2019  char16_t c = value[i];
2020  if (!IsUnicodeSurrogate(c)) {
2021  continue;
2022  } else if (IsUnicodeSurrogateTrail(c) || i == n - 1) {
2023  value[i] = kUnicodeReplacementCharacter;
2024  } else {
2025  char16_t d = value[i + 1];
2026  if (IsUnicodeTrail(d)) {
2027  i++;
2028  } else {
2029  value[i] = kUnicodeReplacementCharacter;
2030  }
2031  }
2032  }
2033 
2034  args.GetReturnValue().Set(
2035  String::NewFromTwoByte(env->isolate(),
2036  *value,
2037  v8::NewStringType::kNormal,
2038  n).ToLocalChecked());
2039 }
2040 
2041 static void DomainToASCII(const FunctionCallbackInfo<Value>& args) {
2042  Environment* env = Environment::GetCurrent(args);
2043  CHECK_GE(args.Length(), 1);
2044  CHECK(args[0]->IsString());
2045  Utf8Value value(env->isolate(), args[0]);
2046 
2047  url_host host{{""}, HOST_TYPE_DOMAIN};
2048  // Assuming the host is used for a special scheme.
2049  ParseHost(&host, *value, value.length(), true);
2050  if (host.type == HOST_TYPE_FAILED) {
2051  args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
2052  return;
2053  }
2054  std::string out;
2055  WriteHost(&host, &out);
2056  args.GetReturnValue().Set(
2057  String::NewFromUtf8(env->isolate(),
2058  out.c_str(),
2059  v8::NewStringType::kNormal).ToLocalChecked());
2060 }
2061 
2062 static void DomainToUnicode(const FunctionCallbackInfo<Value>& args) {
2063  Environment* env = Environment::GetCurrent(args);
2064  CHECK_GE(args.Length(), 1);
2065  CHECK(args[0]->IsString());
2066  Utf8Value value(env->isolate(), args[0]);
2067 
2068  url_host host{{""}, HOST_TYPE_DOMAIN};
2069  // Assuming the host is used for a special scheme.
2070  ParseHost(&host, *value, value.length(), true, true);
2071  if (host.type == HOST_TYPE_FAILED) {
2072  args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
2073  return;
2074  }
2075  std::string out;
2076  WriteHost(&host, &out);
2077  args.GetReturnValue().Set(
2078  String::NewFromUtf8(env->isolate(),
2079  out.c_str(),
2080  v8::NewStringType::kNormal).ToLocalChecked());
2081 }
2082 
2083 std::string URL::ToFilePath() {
2084  if (context_.scheme != "file:") {
2085  return "";
2086  }
2087 
2088 #ifdef _WIN32
2089  const char* slash = "\\";
2090  auto is_slash = [] (char ch) {
2091  return ch == '/' || ch == '\\';
2092  };
2093 #else
2094  const char* slash = "/";
2095  auto is_slash = [] (char ch) {
2096  return ch == '/';
2097  };
2098  if ((context_.flags & URL_FLAGS_HAS_HOST) &&
2099  context_.host.length() > 0) {
2100  return "";
2101  }
2102 #endif
2103  std::string decoded_path;
2104  for (std::string& part : context_.path) {
2105  std::string decoded;
2106  PercentDecode(part.c_str(), part.length(), &decoded);
2107  for (char& ch : decoded) {
2108  if (is_slash(ch)) {
2109  return "";
2110  }
2111  }
2112  decoded_path += slash + decoded;
2113  }
2114 
2115 #ifdef _WIN32
2116  // TODO(TimothyGu): Use "\\?\" long paths on Windows.
2117 
2118  // If hostname is set, then we have a UNC path. Pass the hostname through
2119  // ToUnicode just in case it is an IDN using punycode encoding. We do not
2120  // need to worry about percent encoding because the URL parser will have
2121  // already taken care of that for us. Note that this only causes IDNs with an
2122  // appropriate `xn--` prefix to be decoded.
2123  if ((context_.flags & URL_FLAGS_HAS_HOST) &&
2124  context_.host.length() > 0) {
2125  std::string unicode_host;
2126  if (!ToUnicode(&context_.host, &unicode_host)) {
2127  return "";
2128  }
2129  return "\\\\" + unicode_host + decoded_path;
2130  }
2131  // Otherwise, it's a local path that requires a drive letter.
2132  if (decoded_path.length() < 3) {
2133  return "";
2134  }
2135  if (decoded_path[2] != ':' ||
2136  !IsASCIIAlpha(decoded_path[1])) {
2137  return "";
2138  }
2139  // Strip out the leading '\'.
2140  return decoded_path.substr(1);
2141 #else
2142  return decoded_path;
2143 #endif
2144 }
2145 
2146 // This function works by calling out to a JS function that creates and
2147 // returns the JS URL object. Be mindful of the JS<->Native boundary
2148 // crossing that is required.
2149 const Local<Value> URL::ToObject(Environment* env) const {
2150  Isolate* isolate = env->isolate();
2151  Local<Context> context = env->context();
2152  Context::Scope context_scope(context);
2153 
2154  const Local<Value> undef = Undefined(isolate);
2155  const Local<Value> null = Null(isolate);
2156 
2157  if (context_.flags & URL_FLAGS_FAILED)
2158  return Local<Value>();
2159 
2160  Local<Value> argv[9] = {
2161  undef,
2162  undef,
2163  undef,
2164  undef,
2165  null, // host defaults to null
2166  null, // port defaults to null
2167  undef,
2168  null, // query defaults to null
2169  null, // fragment defaults to null
2170  };
2171  SetArgs(env, argv, &context_);
2172 
2173  TryCatch try_catch(isolate);
2174 
2175  // The SetURLConstructor method must have been called already to
2176  // set the constructor function used below. SetURLConstructor is
2177  // called automatically when the internal/url.js module is loaded
2178  // during the internal/bootstrap_node.js processing.
2179  MaybeLocal<Value> ret =
2180  env->url_constructor_function()
2181  ->Call(env->context(), undef, 9, argv);
2182 
2183  if (ret.IsEmpty()) {
2185  FatalException(isolate, try_catch);
2186  }
2187 
2188  return ret.ToLocalChecked();
2189 }
2190 
2191 static void SetURLConstructor(const FunctionCallbackInfo<Value>& args) {
2192  Environment* env = Environment::GetCurrent(args);
2193  CHECK_EQ(args.Length(), 1);
2194  CHECK(args[0]->IsFunction());
2195  env->set_url_constructor_function(args[0].As<Function>());
2196 }
2197 
2198 static void Init(Local<Object> target,
2199  Local<Value> unused,
2200  Local<Context> context,
2201  void* priv) {
2202  Environment* env = Environment::GetCurrent(context);
2203  env->SetMethod(target, "parse", Parse);
2204  env->SetMethod(target, "encodeAuth", EncodeAuthSet);
2205  env->SetMethod(target, "toUSVString", ToUSVString);
2206  env->SetMethod(target, "domainToASCII", DomainToASCII);
2207  env->SetMethod(target, "domainToUnicode", DomainToUnicode);
2208  env->SetMethod(target, "setURLConstructor", SetURLConstructor);
2209 
2210 #define XX(name, _) NODE_DEFINE_CONSTANT(target, name);
2211  FLAGS(XX)
2212 #undef XX
2213 
2214 #define XX(name) NODE_DEFINE_CONSTANT(target, name);
2215  PARSESTATES(XX)
2216 #undef XX
2217 }
2218 } // namespace url
2219 } // namespace node
2220 
2221 NODE_MODULE_CONTEXT_AWARE_BUILTIN(url, node::url::Init)
unsigned char * buf
Definition: cares_wrap.cc:483
void ClearFatalExceptionHandlers(Environment *env)
Definition: node.cc:2688
NODE_MODULE_CONTEXT_AWARE_BUILTIN(inspector, node::inspector::Agent::InitInspector)
int len
Definition: cares_wrap.cc:485
void FatalException(Isolate *isolate, Local< Value > error, Local< Message > message)
Definition: node.cc:2623
Persistent< Context > context_
#define ERR_ARGS(XX)
Definition: node_url.cc:95
#define GET_AND_SET(env, obj, name, data, flag)
Definition: node_url.cc:40
hostent * host
Definition: cares_wrap.cc:482
#define XX(name)
Definition: node_url.cc:106
#define UTF8STRING(isolate, str)
Definition: node_url.cc:50
#define SPECIALS(XX)
Definition: node_url.cc:545
#define TWO_CHAR_STRING_TEST(bits, name, expr)
Definition: node_url.cc:119
dtrace a
Definition: v8ustack.d:531
dtrace p
Definition: v8ustack.d:615
CHAR_TEST(8, IsASCIIHexDigit,(IsASCIIDigit(ch)||(ch >='A' &&ch<='F')||(ch >='a' &&ch<='f'))) CHAR_TEST(8
#define ARGS(XX)
Definition: node_url.cc:84
MaybeLocal< Object > New(Isolate *isolate, Local< String > string, enum encoding enc)
Definition: node_buffer.cc:241
#define GET(env, obj, name)
Definition: node_url.cc:36
url_error_cb_args
Definition: node_url.cc:105
dtrace n
Definition: v8ustack.d:531
uint16_t char16_t
Definition: node_api_types.h:8
url_host_value value
Definition: node_url.cc:80