45 #if defined(NODE_HAVE_I18N_SUPPORT) 57 #include <unicode/utypes.h> 58 #include <unicode/putil.h> 59 #include <unicode/uchar.h> 60 #include <unicode/uclean.h> 61 #include <unicode/udata.h> 62 #include <unicode/uidna.h> 63 #include <unicode/ucnv.h> 64 #include <unicode/utf8.h> 65 #include <unicode/utf16.h> 66 #include <unicode/timezone.h> 67 #include <unicode/ulocdata.h> 68 #include <unicode/uvernum.h> 69 #include <unicode/uversion.h> 70 #include <unicode/ustring.h> 72 #ifdef NODE_HAVE_SMALL_ICU 75 #define SMALL_ICUDATA_ENTRY_POINT \ 76 SMALL_DEF2(U_ICU_VERSION_MAJOR_NUM, U_LIB_SUFFIX_C_NAME) 77 #define SMALL_DEF2(major, suff) SMALL_DEF(major, suff) 78 #ifndef U_LIB_SUFFIX_C_NAME 79 #define SMALL_DEF(major, suff) icusmdt##major##_dat 81 #define SMALL_DEF(major, suff) icusmdt##suff##major##_dat 84 extern "C" const char U_DATA_API SMALL_ICUDATA_ENTRY_POINT[];
90 using v8::FunctionCallbackInfo;
91 using v8::HandleScope;
96 using v8::ObjectTemplate;
103 template <
typename T>
104 MaybeLocal<Object> ToBufferEndian(Environment* env, MaybeStackBuffer<T>*
buf) {
109 static_assert(
sizeof(T) == 1 ||
sizeof(T) == 2,
110 "Currently only one- or two-byte buffers are supported");
111 if (
sizeof(T) > 1 && IsBigEndian()) {
112 SPREAD_BUFFER_ARG(ret.ToLocalChecked(), retbuf);
113 SwapBytes16(retbuf_data, retbuf_length);
120 explicit Converter(
const char* name,
const char* sub = NULL)
122 UErrorCode
status = U_ZERO_ERROR;
123 conv = ucnv_open(name, &status);
124 CHECK(U_SUCCESS(status));
126 ucnv_setSubstChars(conv, sub, strlen(sub), &status);
130 explicit Converter(UConverter* converter,
131 const char* sub = NULL) : conv(converter) {
132 CHECK_NE(conv,
nullptr);
133 UErrorCode status = U_ZERO_ERROR;
135 ucnv_setSubstChars(conv, sub, strlen(sub), &status);
146 class ConverterObject :
public BaseObject, Converter {
148 enum ConverterFlags {
149 CONVERTER_FLAGS_FLUSH = 0x1,
150 CONVERTER_FLAGS_FATAL = 0x2,
151 CONVERTER_FLAGS_IGNORE_BOM = 0x4
154 ~ConverterObject()
override {}
156 static void Has(
const FunctionCallbackInfo<Value>& args) {
157 Environment* env = Environment::GetCurrent(args);
158 HandleScope scope(env->isolate());
160 CHECK_GE(args.Length(), 1);
161 Utf8Value label(env->isolate(), args[0]);
163 UErrorCode status = U_ZERO_ERROR;
164 UConverter* conv = ucnv_open(*label, &status);
165 args.GetReturnValue().Set(!!U_SUCCESS(status));
169 static void Create(
const FunctionCallbackInfo<Value>& args) {
170 Environment* env = Environment::GetCurrent(args);
171 HandleScope scope(env->isolate());
173 CHECK_GE(args.Length(), 2);
174 Utf8Value label(env->isolate(), args[0]);
175 int flags = args[1]->Uint32Value(env->context()).ToChecked();
177 (flags & CONVERTER_FLAGS_FATAL) == CONVERTER_FLAGS_FATAL;
179 (flags & CONVERTER_FLAGS_IGNORE_BOM) == CONVERTER_FLAGS_IGNORE_BOM;
181 UErrorCode status = U_ZERO_ERROR;
182 UConverter* conv = ucnv_open(*label, &status);
183 if (U_FAILURE(status))
187 status = U_ZERO_ERROR;
188 ucnv_setToUCallBack(conv, UCNV_TO_U_CALLBACK_STOP,
189 nullptr,
nullptr,
nullptr, &status);
193 t->SetInternalFieldCount(1);
194 Local<Object> obj = t->NewInstance(env->context()).ToLocalChecked();
195 new ConverterObject(env, obj, conv, ignoreBOM);
196 args.GetReturnValue().Set(obj);
199 static void Decode(
const FunctionCallbackInfo<Value>& args) {
200 Environment* env = Environment::GetCurrent(args);
202 CHECK_GE(args.Length(), 3);
204 Converter utf8(
"utf8");
205 ConverterObject* converter;
206 ASSIGN_OR_RETURN_UNWRAP(&converter, args[0].As<Object>());
207 SPREAD_BUFFER_ARG(args[1], input_obj);
208 int flags = args[2]->Uint32Value(env->context()).ToChecked();
210 UErrorCode status = U_ZERO_ERROR;
211 MaybeStackBuffer<UChar> result;
212 MaybeLocal<Object> ret;
213 size_t limit = ucnv_getMinCharSize(converter->conv) *
216 result.AllocateSufficientStorage(limit);
218 UBool flush = (flags & CONVERTER_FLAGS_FLUSH) == CONVERTER_FLAGS_FLUSH;
220 const char*
source = input_obj_data;
221 size_t source_length = input_obj_length;
223 if (converter->unicode_ && !converter->ignoreBOM_ && !converter->bomSeen_) {
224 int32_t bomOffset = 0;
225 ucnv_detectUnicodeSignature(source, source_length, &bomOffset, &status);
227 source_length -= bomOffset;
228 converter->bomSeen_ =
true;
231 UChar* target = *result;
232 ucnv_toUnicode(converter->conv,
233 &target, target + (limit *
sizeof(UChar)),
234 &source, source + source_length,
235 NULL, flush, &status);
237 if (U_SUCCESS(status)) {
239 result.SetLength(target - &result[0]);
240 ret = ToBufferEndian(env, &result);
241 args.GetReturnValue().Set(ret.ToLocalChecked());
245 args.GetReturnValue().Set(status);
250 converter->bomSeen_ =
false;
251 ucnv_reset(converter->conv);
256 ConverterObject(Environment* env,
257 v8::Local<v8::Object>
wrap,
258 UConverter* converter,
260 const char* sub = NULL) :
261 BaseObject(env, wrap),
262 Converter(converter, sub),
263 ignoreBOM_(ignoreBOM) {
264 MakeWeak<ConverterObject>(
this);
266 switch (ucnv_getType(converter)) {
268 case UCNV_UTF16_BigEndian:
269 case UCNV_UTF16_LittleEndian:
278 bool unicode_ =
false;
279 bool ignoreBOM_ =
false;
280 bool bomSeen_ =
false;
285 void CopySourceBuffer(MaybeStackBuffer<UChar>* dest,
288 const size_t length_in_chars) {
289 dest->AllocateSufficientStorage(length_in_chars);
290 char* dst =
reinterpret_cast<char*
>(**dest);
291 memcpy(dst, data, length);
293 SwapBytes16(dst, length);
297 typedef MaybeLocal<Object> (*TranscodeFunc)(Environment* env,
298 const char* fromEncoding,
299 const char* toEncoding,
301 const size_t source_length,
304 MaybeLocal<Object> Transcode(Environment* env,
305 const char* fromEncoding,
306 const char* toEncoding,
308 const size_t source_length,
309 UErrorCode* status) {
310 *status = U_ZERO_ERROR;
311 MaybeLocal<Object> ret;
312 MaybeStackBuffer<char> result;
313 Converter to(toEncoding,
"?");
314 Converter from(fromEncoding);
315 const uint32_t limit = source_length * ucnv_getMaxCharSize(to.conv);
316 result.AllocateSufficientStorage(limit);
317 char* target = *result;
318 ucnv_convertEx(to.conv, from.conv, &target, target + limit,
319 &source, source + source_length,
nullptr,
nullptr,
320 nullptr,
nullptr,
true,
true, status);
321 if (U_SUCCESS(*status)) {
322 result.SetLength(target - &result[0]);
323 ret = ToBufferEndian(env, &result);
328 MaybeLocal<Object> TranscodeToUcs2(Environment* env,
329 const char* fromEncoding,
330 const char* toEncoding,
332 const size_t source_length,
333 UErrorCode* status) {
334 *status = U_ZERO_ERROR;
335 MaybeLocal<Object> ret;
336 MaybeStackBuffer<UChar> destbuf(source_length);
337 Converter from(fromEncoding);
338 const size_t length_in_chars = source_length *
sizeof(UChar);
339 ucnv_toUChars(from.conv, *destbuf, length_in_chars,
340 source, source_length, status);
341 if (U_SUCCESS(*status))
342 ret = ToBufferEndian(env, &destbuf);
346 MaybeLocal<Object> TranscodeFromUcs2(Environment* env,
347 const char* fromEncoding,
348 const char* toEncoding,
350 const size_t source_length,
351 UErrorCode* status) {
352 *status = U_ZERO_ERROR;
353 MaybeStackBuffer<UChar> sourcebuf;
354 MaybeLocal<Object> ret;
355 Converter to(toEncoding,
"?");
356 const size_t length_in_chars = source_length /
sizeof(UChar);
357 CopySourceBuffer(&sourcebuf, source, source_length, length_in_chars);
358 MaybeStackBuffer<char> destbuf(length_in_chars);
359 const uint32_t
len = ucnv_fromUChars(to.conv, *destbuf, length_in_chars,
360 *sourcebuf, length_in_chars, status);
361 if (U_SUCCESS(*status)) {
362 destbuf.SetLength(len);
363 ret = ToBufferEndian(env, &destbuf);
368 MaybeLocal<Object> TranscodeUcs2FromUtf8(Environment* env,
369 const char* fromEncoding,
370 const char* toEncoding,
372 const size_t source_length,
373 UErrorCode* status) {
374 *status = U_ZERO_ERROR;
375 MaybeStackBuffer<UChar> destbuf;
376 int32_t result_length;
377 u_strFromUTF8(*destbuf, destbuf.capacity(), &result_length,
379 MaybeLocal<Object> ret;
380 if (U_SUCCESS(*status)) {
381 destbuf.SetLength(result_length);
382 ret = ToBufferEndian(env, &destbuf);
383 }
else if (*status == U_BUFFER_OVERFLOW_ERROR) {
384 *status = U_ZERO_ERROR;
385 destbuf.AllocateSufficientStorage(result_length);
386 u_strFromUTF8(*destbuf, result_length, &result_length,
387 source, source_length, status);
388 if (U_SUCCESS(*status)) {
389 destbuf.SetLength(result_length);
390 ret = ToBufferEndian(env, &destbuf);
396 MaybeLocal<Object> TranscodeUtf8FromUcs2(Environment* env,
397 const char* fromEncoding,
398 const char* toEncoding,
400 const size_t source_length,
401 UErrorCode* status) {
402 *status = U_ZERO_ERROR;
403 MaybeLocal<Object> ret;
404 const size_t length_in_chars = source_length /
sizeof(UChar);
405 int32_t result_length;
406 MaybeStackBuffer<UChar> sourcebuf;
407 MaybeStackBuffer<char> destbuf;
408 CopySourceBuffer(&sourcebuf, source, source_length, length_in_chars);
409 u_strToUTF8(*destbuf, destbuf.capacity(), &result_length,
410 *sourcebuf, length_in_chars,
status);
411 if (U_SUCCESS(*status)) {
412 destbuf.SetLength(result_length);
413 ret = ToBufferEndian(env, &destbuf);
414 }
else if (*status == U_BUFFER_OVERFLOW_ERROR) {
415 *status = U_ZERO_ERROR;
416 destbuf.AllocateSufficientStorage(result_length);
417 u_strToUTF8(*destbuf, result_length, &result_length, *sourcebuf,
418 length_in_chars, status);
419 if (U_SUCCESS(*status)) {
420 destbuf.SetLength(result_length);
421 ret = ToBufferEndian(env, &destbuf);
429 case ASCII:
return "us-ascii";
430 case LATIN1:
return "iso8859-1";
431 case UCS2:
return "utf16le";
432 case UTF8:
return "utf-8";
433 default:
return NULL;
437 bool SupportedEncoding(
const enum encoding encoding) {
442 case UTF8:
return true;
443 default:
return false;
447 void Transcode(
const FunctionCallbackInfo<Value>&args) {
448 Environment* env = Environment::GetCurrent(args);
449 Isolate* isolate = env->isolate();
450 UErrorCode status = U_ZERO_ERROR;
451 MaybeLocal<Object> result;
453 THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
454 SPREAD_BUFFER_ARG(args[0], ts_obj);
458 if (SupportedEncoding(fromEncoding) && SupportedEncoding(toEncoding)) {
459 TranscodeFunc tfn = &Transcode;
460 switch (fromEncoding) {
463 if (toEncoding ==
UCS2)
464 tfn = &TranscodeToUcs2;
467 if (toEncoding ==
UCS2)
468 tfn = &TranscodeUcs2FromUtf8;
471 switch (toEncoding) {
476 tfn = &TranscodeUtf8FromUcs2;
479 tfn = &TranscodeFromUcs2;
487 result = tfn(env, EncodingName(fromEncoding), EncodingName(toEncoding),
488 ts_obj_data, ts_obj_length, &status);
490 status = U_ILLEGAL_ARGUMENT_ERROR;
493 if (result.IsEmpty())
494 return args.GetReturnValue().Set(status);
496 return args.GetReturnValue().Set(result.ToLocalChecked());
499 void ICUErrorName(
const FunctionCallbackInfo<Value>& args) {
500 Environment* env = Environment::GetCurrent(args);
501 UErrorCode status =
static_cast<UErrorCode
>(args[0]->Int32Value());
502 args.GetReturnValue().Set(
503 String::NewFromUtf8(env->isolate(),
505 v8::NewStringType::kNormal).ToLocalChecked());
508 #define TYPE_ICU "icu" 509 #define TYPE_UNICODE "unicode" 510 #define TYPE_CLDR "cldr" 521 const char* GetVersion(
const char* type,
522 char buf[U_MAX_VERSION_STRING_LENGTH],
523 UErrorCode* status) {
524 if (!strcmp(type, TYPE_ICU)) {
525 return U_ICU_VERSION;
526 }
else if (!strcmp(type, TYPE_UNICODE)) {
527 return U_UNICODE_VERSION;
528 }
else if (!strcmp(type, TYPE_TZ)) {
529 return TimeZone::getTZDataVersion(*status);
530 }
else if (!strcmp(type, TYPE_CLDR)) {
531 UVersionInfo versionArray;
532 ulocdata_getCLDRVersion(versionArray, status);
533 if (U_SUCCESS(*status)) {
534 u_versionToString(versionArray, buf);
542 void GetVersion(
const FunctionCallbackInfo<Value>& args) {
543 Environment* env = Environment::GetCurrent(args);
544 if ( args.Length() == 0 ) {
546 args.GetReturnValue().Set(
547 String::NewFromUtf8(env->isolate(),
553 CHECK_GE(args.Length(), 1);
554 CHECK(args[0]->IsString());
555 Utf8Value val(env->isolate(), args[0]);
556 UErrorCode status = U_ZERO_ERROR;
557 char buf[U_MAX_VERSION_STRING_LENGTH] =
"";
558 const char* versionString = GetVersion(*val, buf, &status);
560 if (U_SUCCESS(status) && versionString) {
562 args.GetReturnValue().Set(
563 String::NewFromUtf8(env->isolate(),
571 bool InitializeICUDirectory(
const std::string& path) {
572 UErrorCode status = U_ZERO_ERROR;
574 #ifdef NODE_HAVE_SMALL_ICU 576 udata_setCommonData(&SMALL_ICUDATA_ENTRY_POINT, &status);
577 #else // !NODE_HAVE_SMALL_ICU 579 #endif // !NODE_HAVE_SMALL_ICU 581 u_setDataDirectory(path.c_str());
584 return status == U_ZERO_ERROR;
587 int32_t ToUnicode(MaybeStackBuffer<char>* buf,
590 UErrorCode status = U_ZERO_ERROR;
591 uint32_t options = UIDNA_NONTRANSITIONAL_TO_UNICODE;
592 UIDNA* uidna = uidna_openUTS46(options, &status);
593 if (U_FAILURE(status))
595 UIDNAInfo info = UIDNA_INFO_INITIALIZER;
597 int32_t len = uidna_nameToUnicodeUTF8(uidna,
599 **buf, buf->capacity(),
606 if (status == U_BUFFER_OVERFLOW_ERROR) {
607 status = U_ZERO_ERROR;
608 buf->AllocateSufficientStorage(len);
609 len = uidna_nameToUnicodeUTF8(uidna,
611 **buf, buf->capacity(),
619 if (U_FAILURE(status)) {
630 int32_t ToASCII(MaybeStackBuffer<char>* buf,
633 enum idna_mode mode) {
634 UErrorCode status = U_ZERO_ERROR;
637 UIDNA_CHECK_CONTEXTJ |
638 UIDNA_NONTRANSITIONAL_TO_ASCII;
639 if (mode == IDNA_STRICT) {
640 options |= UIDNA_USE_STD3_RULES;
645 UIDNA* uidna = uidna_openUTS46(options, &status);
646 if (U_FAILURE(status))
648 UIDNAInfo info = UIDNA_INFO_INITIALIZER;
650 int32_t len = uidna_nameToASCII_UTF8(uidna,
652 **buf, buf->capacity(),
656 if (status == U_BUFFER_OVERFLOW_ERROR) {
657 status = U_ZERO_ERROR;
658 buf->AllocateSufficientStorage(len);
659 len = uidna_nameToASCII_UTF8(uidna,
661 **buf, buf->capacity(),
683 info.errors &= ~UIDNA_ERROR_HYPHEN_3_4;
684 info.errors &= ~UIDNA_ERROR_LEADING_HYPHEN;
685 info.errors &= ~UIDNA_ERROR_TRAILING_HYPHEN;
687 if (mode != IDNA_STRICT) {
689 info.errors &= ~UIDNA_ERROR_EMPTY_LABEL;
690 info.errors &= ~UIDNA_ERROR_LABEL_TOO_LONG;
691 info.errors &= ~UIDNA_ERROR_DOMAIN_NAME_TOO_LONG;
694 if (U_FAILURE(status) || (mode != IDNA_LENIENT && info.errors != 0)) {
705 static void ToUnicode(
const FunctionCallbackInfo<Value>& args) {
706 Environment* env = Environment::GetCurrent(args);
707 CHECK_GE(args.Length(), 1);
708 CHECK(args[0]->IsString());
709 Utf8Value val(env->isolate(), args[0]);
711 MaybeStackBuffer<char>
buf;
712 int32_t len = ToUnicode(&buf, *val, val.length());
715 return env->ThrowError(
"Cannot convert name to Unicode");
718 args.GetReturnValue().Set(
719 String::NewFromUtf8(env->isolate(),
721 v8::NewStringType::kNormal,
722 len).ToLocalChecked());
725 static void ToASCII(
const FunctionCallbackInfo<Value>& args) {
726 Environment* env = Environment::GetCurrent(args);
727 CHECK_GE(args.Length(), 1);
728 CHECK(args[0]->IsString());
729 Utf8Value val(env->isolate(), args[0]);
731 bool lenient = args[1]->BooleanValue(env->context()).FromJust();
732 enum idna_mode mode = lenient ? IDNA_LENIENT : IDNA_DEFAULT;
734 MaybeStackBuffer<char>
buf;
735 int32_t len = ToASCII(&buf, *val, val.length(), mode);
738 return env->ThrowError(
"Cannot convert name to ASCII");
741 args.GetReturnValue().Set(
742 String::NewFromUtf8(env->isolate(),
744 v8::NewStringType::kNormal,
745 len).ToLocalChecked());
769 static int GetColumnWidth(UChar32 codepoint,
770 bool ambiguous_as_full_width =
false) {
771 const auto zero_width_mask = U_GC_CC_MASK |
775 if (codepoint != 0x00AD &&
776 ((U_MASK(u_charType(codepoint)) & zero_width_mask) ||
777 u_hasBinaryProperty(codepoint, UCHAR_EMOJI_MODIFIER))) {
784 const int eaw = u_getIntPropertyValue(codepoint, UCHAR_EAST_ASIAN_WIDTH);
791 if (ambiguous_as_full_width) {
796 if (u_hasBinaryProperty(codepoint, UCHAR_EMOJI_PRESENTATION)) {
808 static void GetStringWidth(
const FunctionCallbackInfo<Value>& args) {
809 Environment* env = Environment::GetCurrent(args);
810 if (args.Length() < 1)
813 bool ambiguous_as_full_width = args[1]->BooleanValue();
814 bool expand_emoji_sequence = args[2]->BooleanValue();
816 if (args[0]->IsNumber()) {
817 args.GetReturnValue().Set(
818 GetColumnWidth(args[0]->Uint32Value(),
819 ambiguous_as_full_width));
823 TwoByteValue value(env->isolate(), args[0]);
825 UChar* str =
reinterpret_cast<UChar*
>(*value);
826 static_assert(
sizeof(*str) ==
sizeof(**value),
827 "sizeof(*str) == sizeof(**value)");
833 while (n < value.length()) {
835 U16_NEXT(str, n, value.length(), c);
846 if (!expand_emoji_sequence &&
847 n > 0 && p == 0x200d &&
848 (u_hasBinaryProperty(c, UCHAR_EMOJI_PRESENTATION) ||
849 u_hasBinaryProperty(c, UCHAR_EMOJI_MODIFIER))) {
852 width += GetColumnWidth(c, ambiguous_as_full_width);
854 args.GetReturnValue().Set(width);
857 void Init(Local<Object> target,
859 Local<Context> context,
861 Environment* env = Environment::GetCurrent(context);
862 env->SetMethod(target,
"toUnicode", ToUnicode);
863 env->SetMethod(target,
"toASCII", ToASCII);
864 env->SetMethod(target,
"getStringWidth", GetStringWidth);
865 env->SetMethod(target,
"getVersion", GetVersion);
868 env->SetMethod(target,
"icuErrName", ICUErrorName);
869 env->SetMethod(target,
"transcode", Transcode);
872 env->SetMethod(target,
"getConverter", ConverterObject::Create);
873 env->SetMethod(target,
"decode", ConverterObject::Decode);
874 env->SetMethod(target,
"hasConverter", ConverterObject::Has);
882 #endif // NODE_HAVE_I18N_SUPPORT
NODE_MODULE_CONTEXT_AWARE_BUILTIN(inspector, node::inspector::Agent::InitInspector)
union node::cares_wrap::@8::CaresAsyncData::@0 data
enum encoding ParseEncoding(const char *encoding, enum encoding default_encoding)
MaybeLocal< Object > New(Isolate *isolate, Local< String > string, enum encoding enc)
void Init(int *argc, const char **argv, int *exec_argc, const char ***exec_argv)