From 5080491ae45b07f2cbb4d555c3a993d82c858c36 Mon Sep 17 00:00:00 2001 From: James M Snell Date: Sun, 22 Mar 2020 15:33:34 -0700 Subject: [PATCH] src: refactoring and cleanup of node_i18n Signed-off-by: James M Snell PR-URL: https://github.com/nodejs/node/pull/32438 Reviewed-By: Anna Henningsen Reviewed-By: Joyee Cheung --- src/node_i18n.cc | 364 +++++++++++++++++++++++------------------------ src/node_i18n.h | 78 +++++++++- 2 files changed, 252 insertions(+), 190 deletions(-) diff --git a/src/node_i18n.cc b/src/node_i18n.cc index 3bf452b6978e6e..169374aa5de441 100644 --- a/src/node_i18n.cc +++ b/src/node_i18n.cc @@ -87,7 +87,6 @@ namespace node { using v8::Context; using v8::FunctionCallbackInfo; using v8::FunctionTemplate; -using v8::HandleScope; using v8::Int32; using v8::Isolate; using v8::Local; @@ -118,189 +117,6 @@ MaybeLocal ToBufferEndian(Environment* env, MaybeStackBuffer* buf) { return ret; } -struct Converter { - explicit Converter(const char* name, const char* sub = nullptr) - : conv(nullptr) { - UErrorCode status = U_ZERO_ERROR; - conv = ucnv_open(name, &status); - CHECK(U_SUCCESS(status)); - if (sub != nullptr) { - ucnv_setSubstChars(conv, sub, strlen(sub), &status); - } - } - - explicit Converter(UConverter* converter, - const char* sub = nullptr) : conv(converter) { - CHECK_NOT_NULL(conv); - UErrorCode status = U_ZERO_ERROR; - if (sub != nullptr) { - ucnv_setSubstChars(conv, sub, strlen(sub), &status); - } - } - - ~Converter() { - ucnv_close(conv); - } - - UConverter* conv; -}; - -class ConverterObject : public BaseObject, Converter { - public: - enum ConverterFlags { - CONVERTER_FLAGS_FLUSH = 0x1, - CONVERTER_FLAGS_FATAL = 0x2, - CONVERTER_FLAGS_IGNORE_BOM = 0x4 - }; - - ~ConverterObject() override = default; - - static void Has(const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); - HandleScope scope(env->isolate()); - - CHECK_GE(args.Length(), 1); - Utf8Value label(env->isolate(), args[0]); - - UErrorCode status = U_ZERO_ERROR; - UConverter* conv = ucnv_open(*label, &status); - args.GetReturnValue().Set(!!U_SUCCESS(status)); - ucnv_close(conv); - } - - static void Create(const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); - HandleScope scope(env->isolate()); - - Local t = env->i18n_converter_template(); - Local obj; - if (!t->NewInstance(env->context()).ToLocal(&obj)) return; - - CHECK_GE(args.Length(), 2); - Utf8Value label(env->isolate(), args[0]); - int flags = args[1]->Uint32Value(env->context()).ToChecked(); - bool fatal = - (flags & CONVERTER_FLAGS_FATAL) == CONVERTER_FLAGS_FATAL; - bool ignoreBOM = - (flags & CONVERTER_FLAGS_IGNORE_BOM) == CONVERTER_FLAGS_IGNORE_BOM; - - UErrorCode status = U_ZERO_ERROR; - UConverter* conv = ucnv_open(*label, &status); - if (U_FAILURE(status)) - return; - - if (fatal) { - status = U_ZERO_ERROR; - ucnv_setToUCallBack(conv, UCNV_TO_U_CALLBACK_STOP, - nullptr, nullptr, nullptr, &status); - } - - new ConverterObject(env, obj, conv, ignoreBOM); - args.GetReturnValue().Set(obj); - } - - static void Decode(const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); - - CHECK_GE(args.Length(), 3); // Converter, Buffer, Flags - - ConverterObject* converter; - ASSIGN_OR_RETURN_UNWRAP(&converter, args[0].As()); - ArrayBufferViewContents input(args[1]); - int flags = args[2]->Uint32Value(env->context()).ToChecked(); - - UErrorCode status = U_ZERO_ERROR; - MaybeStackBuffer result; - MaybeLocal ret; - size_t limit = ucnv_getMinCharSize(converter->conv) * input.length(); - if (limit > 0) - result.AllocateSufficientStorage(limit); - - UBool flush = (flags & CONVERTER_FLAGS_FLUSH) == CONVERTER_FLAGS_FLUSH; - auto cleanup = OnScopeLeave([&]() { - if (flush) { - // Reset the converter state. - converter->bomSeen_ = false; - ucnv_reset(converter->conv); - } - }); - - const char* source = input.data(); - size_t source_length = input.length(); - - UChar* target = *result; - ucnv_toUnicode(converter->conv, - &target, target + (limit * sizeof(UChar)), - &source, source + source_length, - nullptr, flush, &status); - - if (U_SUCCESS(status)) { - bool omit_initial_bom = false; - if (limit > 0) { - result.SetLength(target - &result[0]); - if (result.length() > 0 && - converter->unicode_ && - !converter->ignoreBOM_ && - !converter->bomSeen_) { - // If the very first result in the stream is a BOM, and we are not - // explicitly told to ignore it, then we mark it for discarding. - if (result[0] == 0xFEFF) { - omit_initial_bom = true; - } - converter->bomSeen_ = true; - } - } - ret = ToBufferEndian(env, &result); - if (omit_initial_bom && !ret.IsEmpty()) { - // Peform `ret = ret.slice(2)`. - CHECK(ret.ToLocalChecked()->IsUint8Array()); - Local orig_ret = ret.ToLocalChecked().As(); - ret = Buffer::New(env, - orig_ret->Buffer(), - orig_ret->ByteOffset() + 2, - orig_ret->ByteLength() - 2) - .FromMaybe(Local()); - } - if (!ret.IsEmpty()) - args.GetReturnValue().Set(ret.ToLocalChecked()); - return; - } - - args.GetReturnValue().Set(status); - } - - SET_NO_MEMORY_INFO() - SET_MEMORY_INFO_NAME(ConverterObject) - SET_SELF_SIZE(ConverterObject) - - protected: - ConverterObject(Environment* env, - Local wrap, - UConverter* converter, - bool ignoreBOM, - const char* sub = nullptr) : - BaseObject(env, wrap), - Converter(converter, sub), - ignoreBOM_(ignoreBOM) { - MakeWeak(); - - switch (ucnv_getType(converter)) { - case UCNV_UTF8: - case UCNV_UTF16_BigEndian: - case UCNV_UTF16_LittleEndian: - unicode_ = true; - break; - default: - unicode_ = false; - } - } - - private: - bool unicode_ = false; // True if this is a Unicode converter - bool ignoreBOM_ = false; // True if the BOM should be ignored on Unicode - bool bomSeen_ = false; // True if the BOM has been seen -}; - // One-Shot Converters void CopySourceBuffer(MaybeStackBuffer* dest, @@ -333,10 +149,10 @@ MaybeLocal Transcode(Environment* env, MaybeStackBuffer result; Converter to(toEncoding, "?"); Converter from(fromEncoding); - const uint32_t limit = source_length * ucnv_getMaxCharSize(to.conv); + const uint32_t limit = source_length * to.max_char_size(); result.AllocateSufficientStorage(limit); char* target = *result; - ucnv_convertEx(to.conv, from.conv, &target, target + limit, + ucnv_convertEx(to.conv(), from.conv(), &target, target + limit, &source, source + source_length, nullptr, nullptr, nullptr, nullptr, true, true, status); if (U_SUCCESS(*status)) { @@ -357,7 +173,7 @@ MaybeLocal TranscodeToUcs2(Environment* env, MaybeStackBuffer destbuf(source_length); Converter from(fromEncoding); const size_t length_in_chars = source_length * sizeof(UChar); - ucnv_toUChars(from.conv, *destbuf, length_in_chars, + ucnv_toUChars(from.conv(), *destbuf, length_in_chars, source, source_length, status); if (U_SUCCESS(*status)) ret = ToBufferEndian(env, &destbuf); @@ -377,7 +193,7 @@ MaybeLocal TranscodeFromUcs2(Environment* env, const size_t length_in_chars = source_length / sizeof(UChar); CopySourceBuffer(&sourcebuf, source, source_length, length_in_chars); MaybeStackBuffer destbuf(length_in_chars); - const uint32_t len = ucnv_fromUChars(to.conv, *destbuf, length_in_chars, + const uint32_t len = ucnv_fromUChars(to.conv(), *destbuf, length_in_chars, *sourcebuf, length_in_chars, status); if (U_SUCCESS(*status)) { destbuf.SetLength(len); @@ -528,6 +344,178 @@ void ICUErrorName(const FunctionCallbackInfo& args) { } // anonymous namespace +Converter::Converter(const char* name, const char* sub) { + UErrorCode status = U_ZERO_ERROR; + UConverter* conv = ucnv_open(name, &status); + CHECK(U_SUCCESS(status)); + conv_.reset(conv); + set_subst_chars(sub); +} + +Converter::Converter(UConverter* converter, const char* sub) + : conv_(converter) { + set_subst_chars(sub); +} + +void Converter::set_subst_chars(const char* sub) { + CHECK(conv_); + UErrorCode status = U_ZERO_ERROR; + if (sub != nullptr) { + ucnv_setSubstChars(conv_.get(), sub, strlen(sub), &status); + CHECK(U_SUCCESS(status)); + } +} + +void Converter::reset() { + ucnv_reset(conv_.get()); +} + +size_t Converter::min_char_size() const { + CHECK(conv_); + return ucnv_getMinCharSize(conv_.get()); +} + +size_t Converter::max_char_size() const { + CHECK(conv_); + return ucnv_getMaxCharSize(conv_.get()); +} + +void ConverterObject::Has(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + + CHECK_GE(args.Length(), 1); + Utf8Value label(env->isolate(), args[0]); + + UErrorCode status = U_ZERO_ERROR; + ConverterPointer conv(ucnv_open(*label, &status)); + args.GetReturnValue().Set(!!U_SUCCESS(status)); +} + +void ConverterObject::Create(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + + Local t = env->i18n_converter_template(); + Local obj; + if (!t->NewInstance(env->context()).ToLocal(&obj)) return; + + CHECK_GE(args.Length(), 2); + Utf8Value label(env->isolate(), args[0]); + int flags = args[1]->Uint32Value(env->context()).ToChecked(); + bool fatal = + (flags & CONVERTER_FLAGS_FATAL) == CONVERTER_FLAGS_FATAL; + + UErrorCode status = U_ZERO_ERROR; + UConverter* conv = ucnv_open(*label, &status); + if (U_FAILURE(status)) + return; + + if (fatal) { + status = U_ZERO_ERROR; + ucnv_setToUCallBack(conv, UCNV_TO_U_CALLBACK_STOP, + nullptr, nullptr, nullptr, &status); + } + + new ConverterObject(env, obj, conv, flags); + args.GetReturnValue().Set(obj); +} + +void ConverterObject::Decode(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + + CHECK_GE(args.Length(), 3); // Converter, Buffer, Flags + + ConverterObject* converter; + ASSIGN_OR_RETURN_UNWRAP(&converter, args[0].As()); + ArrayBufferViewContents input(args[1]); + int flags = args[2]->Uint32Value(env->context()).ToChecked(); + + UErrorCode status = U_ZERO_ERROR; + MaybeStackBuffer result; + MaybeLocal ret; + size_t limit = converter->min_char_size() * input.length(); + if (limit > 0) + result.AllocateSufficientStorage(limit); + + UBool flush = (flags & CONVERTER_FLAGS_FLUSH) == CONVERTER_FLAGS_FLUSH; + auto cleanup = OnScopeLeave([&]() { + if (flush) { + // Reset the converter state. + converter->set_bom_seen(false); + converter->reset(); + } + }); + + const char* source = input.data(); + size_t source_length = input.length(); + + UChar* target = *result; + ucnv_toUnicode(converter->conv(), + &target, + target + (limit * sizeof(UChar)), + &source, + source + source_length, + nullptr, + flush, + &status); + + if (U_SUCCESS(status)) { + bool omit_initial_bom = false; + if (limit > 0) { + result.SetLength(target - &result[0]); + if (result.length() > 0 && + converter->unicode() && + !converter->ignore_bom() && + !converter->bom_seen()) { + // If the very first result in the stream is a BOM, and we are not + // explicitly told to ignore it, then we mark it for discarding. + if (result[0] == 0xFEFF) + omit_initial_bom = true; + converter->set_bom_seen(true); + } + } + ret = ToBufferEndian(env, &result); + if (omit_initial_bom && !ret.IsEmpty()) { + // Peform `ret = ret.slice(2)`. + CHECK(ret.ToLocalChecked()->IsUint8Array()); + Local orig_ret = ret.ToLocalChecked().As(); + ret = Buffer::New(env, + orig_ret->Buffer(), + orig_ret->ByteOffset() + 2, + orig_ret->ByteLength() - 2) + .FromMaybe(Local()); + } + if (!ret.IsEmpty()) + args.GetReturnValue().Set(ret.ToLocalChecked()); + return; + } + + args.GetReturnValue().Set(status); +} + +ConverterObject::ConverterObject( + Environment* env, + Local wrap, + UConverter* converter, + int flags, + const char* sub) + : BaseObject(env, wrap), + Converter(converter, sub), + flags_(flags) { + MakeWeak(); + + switch (ucnv_getType(converter)) { + case UCNV_UTF8: + case UCNV_UTF16_BigEndian: + case UCNV_UTF16_LittleEndian: + flags_ |= CONVERTER_FLAGS_UNICODE; + break; + default: { + // Fall through + } + } +} + + bool InitializeICUDirectory(const std::string& path) { UErrorCode status = U_ZERO_ERROR; if (path.empty()) { diff --git a/src/node_i18n.h b/src/node_i18n.h index a0a398ac818736..5c1501ea1908e0 100644 --- a/src/node_i18n.h +++ b/src/node_i18n.h @@ -24,11 +24,16 @@ #if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS +#if defined(NODE_HAVE_I18N_SUPPORT) + +#include "base_object.h" +#include "env.h" #include "util.h" +#include "v8.h" -#include +#include -#if defined(NODE_HAVE_I18N_SUPPORT) +#include namespace node { @@ -60,6 +65,75 @@ int32_t ToUnicode(MaybeStackBuffer* buf, const char* input, size_t length); +struct ConverterDeleter { + void operator()(UConverter* pointer) const { ucnv_close(pointer); } +}; +using ConverterPointer = std::unique_ptr; + +class Converter { + public: + explicit Converter(const char* name, const char* sub = nullptr); + explicit Converter(UConverter* converter, const char* sub = nullptr); + + UConverter* conv() const { return conv_.get(); } + + size_t max_char_size() const; + size_t min_char_size() const; + void reset(); + void set_subst_chars(const char* sub = nullptr); + + private: + ConverterPointer conv_; +}; + +class ConverterObject : public BaseObject, Converter { + public: + enum ConverterFlags { + CONVERTER_FLAGS_FLUSH = 0x1, + CONVERTER_FLAGS_FATAL = 0x2, + CONVERTER_FLAGS_IGNORE_BOM = 0x4, + CONVERTER_FLAGS_UNICODE = 0x8, + CONVERTER_FLAGS_BOM_SEEN = 0x10, + }; + + static void Create(const v8::FunctionCallbackInfo& args); + static void Decode(const v8::FunctionCallbackInfo& args); + static void Has(const v8::FunctionCallbackInfo& args); + + SET_NO_MEMORY_INFO() + SET_MEMORY_INFO_NAME(ConverterObject) + SET_SELF_SIZE(ConverterObject) + + protected: + ConverterObject(Environment* env, + v8::Local wrap, + UConverter* converter, + int flags, + const char* sub = nullptr); + + void set_bom_seen(bool seen) { + if (seen) + flags_ |= CONVERTER_FLAGS_BOM_SEEN; + else + flags_ &= ~CONVERTER_FLAGS_BOM_SEEN; + } + + bool bom_seen() const { + return (flags_ & CONVERTER_FLAGS_BOM_SEEN) == CONVERTER_FLAGS_BOM_SEEN; + } + + bool unicode() const { + return (flags_ & CONVERTER_FLAGS_UNICODE) == CONVERTER_FLAGS_UNICODE; + } + + bool ignore_bom() const { + return (flags_ & CONVERTER_FLAGS_IGNORE_BOM) == CONVERTER_FLAGS_IGNORE_BOM; + } + + private: + int flags_ = 0; +}; + } // namespace i18n } // namespace node