Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Windows: Fixed environment variables not read as unicode. #2417

Merged
merged 12 commits into from
Apr 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion libmamba/include/mamba/core/environment.hpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2019, QuantStack and Mamba Contributors
// Copyright (c) 2019-2023, QuantStack and Mamba Contributors
//
// Distributed under the terms of the BSD 3-Clause License.
//
Expand Down
14 changes: 10 additions & 4 deletions libmamba/include/mamba/core/util_os.hpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2019, QuantStack and Mamba Contributors
// Copyright (c) 2019-2023, QuantStack and Mamba Contributors
//
// Distributed under the terms of the BSD 3-Clause License.
//
Expand All @@ -14,6 +14,11 @@

namespace mamba
{
#ifdef _WIN32
// Intention is to avoid including `Windows.h`, while still using the basic Windows API types.
using DWORD = unsigned long;
#endif
Klaim marked this conversation as resolved.
Show resolved Hide resolved

bool is_admin();
fs::u8path get_self_exe_path();

Expand All @@ -40,9 +45,10 @@ namespace mamba
void reset_console();

#ifdef _WIN32
std::string to_utf8(const wchar_t* w, size_t s);
std::string to_utf8(const wchar_t* w);
std::string to_utf8(const std::wstring& s);
std::string to_utf8(const wchar_t* windows_unicode_text, size_t size);
std::string to_utf8(const wchar_t* windows_unicode_text);
std::string to_utf8(const std::wstring& windows_unicode_text);
std::wstring to_windows_unicode(const std::string_view utf8_text);
#endif

/* Test whether a given `std::ostream` object refers to a terminal. */
Expand Down
1 change: 1 addition & 0 deletions libmamba/include/mamba/core/util_random.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#define MAMBA_CORE_UTIL_RANDOM_HPP

#include <algorithm>
#include <array>
#include <cstring>
#include <limits>
#include <random>
Expand Down
99 changes: 66 additions & 33 deletions libmamba/src/core/environment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@
#include "mamba/core/util_string.hpp"

#ifdef _WIN32
#include <mutex>

#include "mamba/core/output.hpp"
#include "mamba/core/util_os.hpp"
#endif

namespace mamba
Expand All @@ -18,54 +21,88 @@ namespace mamba
std::optional<std::string> get(const std::string& key)
{
#ifdef _WIN32
const size_t initial_size = 1024;
std::unique_ptr<char[]> temp_small = std::make_unique<char[]>(initial_size);
std::size_t size = GetEnvironmentVariableA(key.c_str(), temp_small.get(), initial_size);
if (size == 0) // Error or empty/missing
// See:
// https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/getenv-s-wgetenv-s?view=msvc-170
static std::mutex call_mutex;
std::scoped_lock ready_to_execute{ call_mutex }; // Calls to getenv_s kinds of
// functions are not thread-safe, this
// is to prevent related issues.

const auto on_failed = [&](errno_t error_code)
{
LOG_ERROR << fmt::format(
"Failed to acquire environment variable '{}' : errcode = {}",
key,
error_code
);
Comment on lines +33 to +37
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: One can also do (with the right header)

Suggested change
LOG_ERROR << fmt::format(
"Failed to acquire environment variable '{}' : errcode = {}",
key,
error_code
);
fmt::format_to(LOG_ERROR,
"Failed to acquire environment variable '{}' : errcode = {}",
key,
error_code
);

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BTW I know this is possible but I dont know if that changes something performance-wise for fmt? Do you know?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually I tried that syntax and it doenst seem to compile, probably lacks some compatibility layer to add. I'll keep it as is for now.

};

const std::wstring unicode_key = to_windows_unicode(key);
size_t required_size = 0;
if (auto error_code = _wgetenv_s(&required_size, nullptr, 0, unicode_key.c_str());
error_code == 0)
{
// Note that on Windows environment variables can never be empty,
// only missing. See https://stackoverflow.com/a/39095782
auto last_err = GetLastError();
if (last_err != ERROR_ENVVAR_NOT_FOUND)
if (required_size == 0) // The value doesn't exist.
{
LOG_ERROR << "Could not get environment variable: " << last_err;
return {};
}

std::wstring value(required_size, L'?'); // Note: The required size implies a `\0`
// but basic_string doesn't.
if (error_code = _wgetenv_s(
&required_size,
value.data(),
value.size(),
unicode_key.c_str()
);
error_code == 0)
{
value.pop_back(); // Remove the `\0` that was written in, otherwise any future
// concatenation will fail.
return mamba::to_utf8(value);
}
else
{
on_failed(error_code);
}
return {};
}
else if (size > initial_size) // Buffer too small
{
std::unique_ptr<char[]> temp_large = std::make_unique<char[]>(size);
GetEnvironmentVariableA(key.c_str(), temp_large.get(), size);
std::string res(temp_large.get());
return res;
}
else // Success
else
{
std::string res(temp_small.get());
return res;
on_failed(error_code);
}
#else
const char* value = std::getenv(key.c_str());
if (value)
{
return value;
}
else
{
return {};
}
#endif
return {};
}

bool set(const std::string& key, const std::string& value)
{
#ifdef _WIN32
auto res = SetEnvironmentVariableA(key.c_str(), value.c_str());
if (!res)
// See:
// https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/getenv-s-wgetenv-s?view=msvc-170
static std::mutex call_mutex;
std::scoped_lock ready_to_execute{ call_mutex }; // Calls to getenv_s kinds of
// functions are not thread-safe, this
// is to prevent related issues.

const std::wstring unicode_key = to_windows_unicode(key);
const std::wstring unicode_value = to_windows_unicode(value);
auto res = _wputenv_s(unicode_key.c_str(), unicode_value.c_str());
if (res != 0)
{
LOG_ERROR << "Could not set environment variable: " << GetLastError();
LOG_ERROR << fmt::format(
"Could not set environment variable '{}' to '{}' : {}",
key,
value,
GetLastError()
);
}
return res;
return res == 0;
#else
return setenv(key.c_str(), value.c_str(), 1) == 0;
#endif
Expand All @@ -74,11 +111,7 @@ namespace mamba
void unset(const std::string& key)
{
#ifdef _WIN32
auto res = SetEnvironmentVariableA(key.c_str(), NULL);
if (!res)
{
LOG_ERROR << "Could not unset environment variable: " << GetLastError();
}
set(key, "");
#else
unsetenv(key.c_str());
#endif
Expand Down
42 changes: 42 additions & 0 deletions libmamba/src/core/util_os.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@
#include "mamba/core/util_os.hpp"
#include "mamba/core/util_string.hpp"

#ifdef _WIN32
static_assert(std::is_same_v<mamba::DWORD, ::DWORD>);
#endif

namespace mamba
{
// Heavily inspired by https://github.com/gpakosz/whereami/
Expand Down Expand Up @@ -543,6 +547,44 @@ namespace mamba
{
return to_utf8(s.data(), s.size());
}

std::wstring to_windows_unicode(const std::string_view utf8_text)
{
std::wstring output;
if (!utf8_text.empty())
{
assert(utf8_text.size() <= INT_MAX);
const int size = MultiByteToWideChar(
CP_UTF8,
0,
utf8_text.data(),
utf8_text.size(),
nullptr,
0
);
if (size <= 0)
{
unsigned long last_error = ::GetLastError();
LOG_ERROR << "Failed to convert UTF-8 string to Windows Unicode (UTF-16)"
<< std::system_category().message(static_cast<int>(last_error));
throw std::runtime_error("Failed to convert UTF-8 string to UTF-16");
}

output.resize(size);
int res_size = MultiByteToWideChar(
CP_UTF8,
0,
utf8_text.data(),
utf8_text.size(),
output.data(),
output.size()
);
assert(res_size == size);
}

return output;
}

#endif

/* From https://github.com/ikalnytskyi/termcolor
Expand Down
2 changes: 2 additions & 0 deletions libmamba/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ set(LIBMAMBA_TEST_SRCS
src/core/test_virtual_packages.cpp
src/core/test_util.cpp
src/core/test_util_string.cpp
src/core/test_util_os.cpp
src/core/test_system_env.cpp
src/core/test_env_lockfile.cpp
src/core/test_execution.cpp
src/core/test_invoke.cpp
Expand Down
68 changes: 68 additions & 0 deletions libmamba/tests/src/core/test_system_env.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
// Copyright (c) 2023, QuantStack and Mamba Contributors
//
// Distributed under the terms of the BSD 3-Clause License.
//
// The full license is in the file LICENSE, distributed with this software.

#include <string>

#include <gtest/gtest.h>

#include "mamba/core/environment.hpp"
#include "mamba/core/util_random.hpp"


namespace
{
void test_set_get_unset_env_variables(const std::string& key, const std::string& value)
{
const bool did_set = mamba::env::set(key, value);
ASSERT_TRUE(did_set);

const auto result = mamba::env::get(key);
ASSERT_TRUE(result.has_value());
EXPECT_EQ(*result, value);

mamba::env::unset(key);
const auto result_empty = mamba::env::get(key);
ASSERT_FALSE(result_empty.has_value());
}
}

TEST(system_env, set_get_unset_env_variables)
{
const auto key = mamba::generate_random_alphanumeric_string(128);
const auto value = mamba::generate_random_alphanumeric_string(128);

test_set_get_unset_env_variables(key, value);
}

TEST(system_env, set_get_unset_variables_unicode)
{
const std::string key = u8"Joël私のにほん";
const std::string value = u8"Hello, I am Joël. 私のにほんごわへたです";

test_set_get_unset_env_variables(key, value);
}

#ifdef _WIN32

TEST(system_env, get_predefined_env_variable)
{
// We check that normal pre-defined variables are accessible and do not fail access even if some
// of these values have unicode.
const std::vector<std::string> predefined_keys{ "PATH", "OS", "PATHEXT",
"ProgramData", "SystemRoot", "windir",
"APPDATA", "COMPUTERNAME", "TEMP",
"UserName", "USERPROFILE" };

for (const auto& key : predefined_keys)
{
const auto maybe_value = mamba::env::get(key);
ASSERT_TRUE(maybe_value.has_value()) << "key = " + key;
const auto value = *maybe_value;
EXPECT_FALSE(value.empty()) << "key = " + key;
}
}

#endif
34 changes: 34 additions & 0 deletions libmamba/tests/src/core/test_util_os.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Copyright (c) 2023, QuantStack and Mamba Contributors
//
// Distributed under the terms of the BSD 3-Clause License.
//
// The full license is in the file LICENSE, distributed with this software.

#include <string>

#include <gtest/gtest.h>

#include "mamba/core/util_os.hpp"


#ifdef _WIN32

namespace
{
const std::wstring text_utf16 = L"Hello, I am Joël. 私のにほんごわへたです";
const std::string text_utf8 = u8"Hello, I am Joël. 私のにほんごわへたです";
}

TEST(to_utf8, basic_unicode_conversion)
{
auto result = mamba::to_utf8(text_utf16);
EXPECT_EQ(text_utf8, result);
}

TEST(to_windows_unicode, basic_unicode_conversion)
{
auto result = mamba::to_windows_unicode(text_utf8);
EXPECT_EQ(text_utf16, result);
}

#endif