From c58cc9f2ae455061966df0c61d25a982868e6c7d Mon Sep 17 00:00:00 2001 From: Harald Nielsen Date: Fri, 29 Sep 2023 09:46:03 +0200 Subject: [PATCH 1/7] Proper fix utf8 command line arguments --- build_defs/cpp_opts.bzl | 2 +- .../compiler/command_line_interface.cc | 15 ++++++++-- .../compiler/command_line_interface.h | 2 +- src/google/protobuf/compiler/main.cc | 30 ++++++++++++++++++- 4 files changed, 44 insertions(+), 5 deletions(-) diff --git a/build_defs/cpp_opts.bzl b/build_defs/cpp_opts.bzl index 03106df9510c..1bdd6e55e767 100644 --- a/build_defs/cpp_opts.bzl +++ b/build_defs/cpp_opts.bzl @@ -35,7 +35,7 @@ LINK_OPTS = select({ "//build_defs:config_msvc": [ # Suppress linker warnings about files with no symbols defined. "-ignore:4221", - "/utf-8", + "Shell32.lib", ], "@platforms//os:macos": [ "-lpthread", diff --git a/src/google/protobuf/compiler/command_line_interface.cc b/src/google/protobuf/compiler/command_line_interface.cc index e879f149bda3..d5e77ae632f3 100644 --- a/src/google/protobuf/compiler/command_line_interface.cc +++ b/src/google/protobuf/compiler/command_line_interface.cc @@ -1764,10 +1764,21 @@ bool CommandLineInterface::MakeInputsBeProtoPathRelative( bool CommandLineInterface::ExpandArgumentFile( - const std::string& file, std::vector* arguments) { + const char* file, std::vector* arguments) { + +#ifdef _MSC_VER + // Convert the file name to wide chars. + int size = MultiByteToWideChar(CP_UTF8, 0, file, strlen(file), NULL, 0); + std::wstring fileStr; + fileStr.resize(size); + MultiByteToWideChar(CP_UTF8, 0, file, strlen(file), &fileStr[0], fileStr.size()); +#else + std::string fileStr(file); +#endif + // The argument file is searched in the working directory only. We don't // use the proto import path here. - std::ifstream file_stream(file.c_str()); + std::ifstream file_stream(fileStr.c_str()); if (!file_stream.is_open()) { return false; } diff --git a/src/google/protobuf/compiler/command_line_interface.h b/src/google/protobuf/compiler/command_line_interface.h index 4bfee6acd54e..0828497d6a71 100644 --- a/src/google/protobuf/compiler/command_line_interface.h +++ b/src/google/protobuf/compiler/command_line_interface.h @@ -240,7 +240,7 @@ class PROTOC_EXPORT CommandLineInterface { // Read an argument file and append the file's content to the list of // arguments. Return false if the file cannot be read. - bool ExpandArgumentFile(const std::string& file, + bool ExpandArgumentFile(const char* file, std::vector* arguments); // Parses a command-line argument into a name/value pair. Returns diff --git a/src/google/protobuf/compiler/main.cc b/src/google/protobuf/compiler/main.cc index 0ade6ace805c..cc5c128a1667 100644 --- a/src/google/protobuf/compiler/main.cc +++ b/src/google/protobuf/compiler/main.cc @@ -21,6 +21,10 @@ // Must be included last. #include "google/protobuf/port_def.inc" +#ifdef _MSC_VER +#include +#endif + namespace google { namespace protobuf { namespace compiler { @@ -101,6 +105,30 @@ int ProtobufMain(int argc, char* argv[]) { } // namespace protobuf } // namespace google -int main(int argc, char* argv[]) { +#ifdef _MSC_VER +std::string ToMultiByteUTF8String(const wchar_t* input) +{ + int size = WideCharToMultiByte(CP_UTF8, 0, input, wcslen(input), 0, 0, NULL, NULL); + std::string result(size, 0); + if (size) WideCharToMultiByte(CP_UTF8, 0, input, wcslen(input), &result[0], size, NULL, NULL); + return result; +} + +int main(int argc, char* argv[]) +{ + wchar_t** wargv = CommandLineToArgvW(GetCommandLineW(), &argc); + std::string* argv_mbcs_str = new std::string [argc]; + char** argv_mbcs = new char* [argc]; + for (int i = 0; i < argc; i++) + { + argv_mbcs_str[i] = ToMultiByteUTF8String(wargv[i]); + argv_mbcs[i] = const_cast(argv_mbcs_str[i].c_str()); + } + return google::protobuf::compiler::ProtobufMain(argc, argv_mbcs); +} +#else +int main(int argc, char* argv[]) +{ return google::protobuf::compiler::ProtobufMain(argc, argv); } +#endif From ddb17fa0938ed2c0f2ca710c6c5fbf297552d6ce Mon Sep 17 00:00:00 2001 From: Harald Nielsen Date: Fri, 29 Sep 2023 09:51:46 +0200 Subject: [PATCH 2/7] add comment --- src/google/protobuf/compiler/command_line_interface.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/google/protobuf/compiler/command_line_interface.cc b/src/google/protobuf/compiler/command_line_interface.cc index d5e77ae632f3..d6f4e2e98367 100644 --- a/src/google/protobuf/compiler/command_line_interface.cc +++ b/src/google/protobuf/compiler/command_line_interface.cc @@ -1766,6 +1766,8 @@ bool CommandLineInterface::MakeInputsBeProtoPathRelative( bool CommandLineInterface::ExpandArgumentFile( const char* file, std::vector* arguments) { +// On windows to force ifstream to handle proper utr-8, we need to convert to proper supported utf8 wstring. +// If we dont then the file can't be opened. #ifdef _MSC_VER // Convert the file name to wide chars. int size = MultiByteToWideChar(CP_UTF8, 0, file, strlen(file), NULL, 0); From cfbdd357cbed555f8a09d186dbc1f26943002c1c Mon Sep 17 00:00:00 2001 From: Harald Nielsen Date: Wed, 13 Dec 2023 09:57:56 +0100 Subject: [PATCH 3/7] make to match google style for vars --- src/google/protobuf/compiler/command_line_interface.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/google/protobuf/compiler/command_line_interface.cc b/src/google/protobuf/compiler/command_line_interface.cc index d6f4e2e98367..0b48c9c5bba1 100644 --- a/src/google/protobuf/compiler/command_line_interface.cc +++ b/src/google/protobuf/compiler/command_line_interface.cc @@ -1771,16 +1771,16 @@ bool CommandLineInterface::ExpandArgumentFile( #ifdef _MSC_VER // Convert the file name to wide chars. int size = MultiByteToWideChar(CP_UTF8, 0, file, strlen(file), NULL, 0); - std::wstring fileStr; - fileStr.resize(size); - MultiByteToWideChar(CP_UTF8, 0, file, strlen(file), &fileStr[0], fileStr.size()); + std::wstring file_str; + file_str.resize(size); + MultiByteToWideChar(CP_UTF8, 0, file, strlen(file), &file_str[0], file_str.size()); #else - std::string fileStr(file); + std::string file_str(file); #endif // The argument file is searched in the working directory only. We don't // use the proto import path here. - std::ifstream file_stream(fileStr.c_str()); + std::ifstream file_stream(file_str.c_str()); if (!file_stream.is_open()) { return false; } From 881ae8e322747955fe2d2351bb7f2cd5b3561758 Mon Sep 17 00:00:00 2001 From: Harald Date: Tue, 2 Jan 2024 13:11:51 +0100 Subject: [PATCH 4/7] simplify to leak in for instead --- src/google/protobuf/compiler/main.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/google/protobuf/compiler/main.cc b/src/google/protobuf/compiler/main.cc index cc5c128a1667..4eac6800eb0e 100644 --- a/src/google/protobuf/compiler/main.cc +++ b/src/google/protobuf/compiler/main.cc @@ -117,12 +117,11 @@ std::string ToMultiByteUTF8String(const wchar_t* input) int main(int argc, char* argv[]) { wchar_t** wargv = CommandLineToArgvW(GetCommandLineW(), &argc); - std::string* argv_mbcs_str = new std::string [argc]; char** argv_mbcs = new char* [argc]; for (int i = 0; i < argc; i++) { - argv_mbcs_str[i] = ToMultiByteUTF8String(wargv[i]); - argv_mbcs[i] = const_cast(argv_mbcs_str[i].c_str()); + std::string* multibyte_string = new auto(ToMultiByteUTF8String(wargv[i])); + argv_mbcs[i] = const_cast(multibyte_string->c_str()); } return google::protobuf::compiler::ProtobufMain(argc, argv_mbcs); } From 31b3d257d25af4a6c74e8ffe953a9fea5d37bb22 Mon Sep 17 00:00:00 2001 From: Harald Date: Tue, 2 Jan 2024 22:15:07 +0100 Subject: [PATCH 5/7] change to use none const data() overload --- src/google/protobuf/compiler/main.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/google/protobuf/compiler/main.cc b/src/google/protobuf/compiler/main.cc index 4eac6800eb0e..4b3474dc9968 100644 --- a/src/google/protobuf/compiler/main.cc +++ b/src/google/protobuf/compiler/main.cc @@ -121,7 +121,7 @@ int main(int argc, char* argv[]) for (int i = 0; i < argc; i++) { std::string* multibyte_string = new auto(ToMultiByteUTF8String(wargv[i])); - argv_mbcs[i] = const_cast(multibyte_string->c_str()); + argv_mbcs[i] = multibyte_string->data(); } return google::protobuf::compiler::ProtobufMain(argc, argv_mbcs); } From 28dbcf5b7e30243a296f795a5a7c16f51a99f6b8 Mon Sep 17 00:00:00 2001 From: Harald Date: Sat, 6 Jan 2024 17:00:10 +0100 Subject: [PATCH 6/7] Update to google naming convention --- src/google/protobuf/compiler/main.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/google/protobuf/compiler/main.cc b/src/google/protobuf/compiler/main.cc index 4b3474dc9968..a76824604b55 100644 --- a/src/google/protobuf/compiler/main.cc +++ b/src/google/protobuf/compiler/main.cc @@ -106,7 +106,7 @@ int ProtobufMain(int argc, char* argv[]) { } // namespace google #ifdef _MSC_VER -std::string ToMultiByteUTF8String(const wchar_t* input) +std::string ToMultiByteUtf8String(const wchar_t* input) { int size = WideCharToMultiByte(CP_UTF8, 0, input, wcslen(input), 0, 0, NULL, NULL); std::string result(size, 0); @@ -120,7 +120,7 @@ int main(int argc, char* argv[]) char** argv_mbcs = new char* [argc]; for (int i = 0; i < argc; i++) { - std::string* multibyte_string = new auto(ToMultiByteUTF8String(wargv[i])); + std::string* multibyte_string = new auto(ToMultiByteUtf8String(wargv[i])); argv_mbcs[i] = multibyte_string->data(); } return google::protobuf::compiler::ProtobufMain(argc, argv_mbcs); From 2129e3f642eb74c46620f7c7e2cdf5a6c9fa8181 Mon Sep 17 00:00:00 2001 From: Harald Nielsen Date: Thu, 11 Jan 2024 15:37:51 +0100 Subject: [PATCH 7/7] C++14 are not supported yet --- src/google/protobuf/compiler/main.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/google/protobuf/compiler/main.cc b/src/google/protobuf/compiler/main.cc index a76824604b55..34c78b11c6be 100644 --- a/src/google/protobuf/compiler/main.cc +++ b/src/google/protobuf/compiler/main.cc @@ -121,7 +121,7 @@ int main(int argc, char* argv[]) for (int i = 0; i < argc; i++) { std::string* multibyte_string = new auto(ToMultiByteUtf8String(wargv[i])); - argv_mbcs[i] = multibyte_string->data(); + argv_mbcs[i] = const_cast(multibyte_string->c_str()); } return google::protobuf::compiler::ProtobufMain(argc, argv_mbcs); }