diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml index a82e1eb76660b..c3993ef6a1118 100644 --- a/.github/workflows/cpp.yml +++ b/.github/workflows/cpp.yml @@ -474,9 +474,7 @@ jobs: shell: bash run: | ci/scripts/install_gcs_testbench.sh default - echo "PYTHON_BIN_DIR=$(cygpath --windows $(dirname $(which python3.exe)))" >> $GITHUB_ENV - name: Test shell: msys2 {0} run: | - PATH="$(cygpath --unix ${PYTHON_BIN_DIR}):${PATH}" ci/scripts/cpp_test.sh "$(pwd)" "$(pwd)/build" diff --git a/ci/docker/conda-cpp.dockerfile b/ci/docker/conda-cpp.dockerfile index dff1f2224809a..eb035d887a158 100644 --- a/ci/docker/conda-cpp.dockerfile +++ b/ci/docker/conda-cpp.dockerfile @@ -42,17 +42,19 @@ RUN mamba install -q -y \ valgrind && \ mamba clean --all +# We want to install the GCS testbench using the Conda base environment's Python, +# because the test environment's Python may later change. +ENV PIPX_PYTHON=/opt/conda/bin/python3 +COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts +RUN /arrow/ci/scripts/install_gcs_testbench.sh default + # Ensure npm, node and azurite are on path. npm and node are required to install azurite, which will then need to -# be on the path for the tests to run. +# be on the path for the tests to run. ENV PATH=/opt/conda/envs/arrow/bin:$PATH COPY ci/scripts/install_azurite.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_azurite.sh -# We want to install the GCS testbench using the same Python binary that the Conda code will use. -COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts -RUN /arrow/ci/scripts/install_gcs_testbench.sh default - COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin diff --git a/ci/docker/conda-python.dockerfile b/ci/docker/conda-python.dockerfile index 027fd589cecca..7e8dbe76f6248 100644 --- a/ci/docker/conda-python.dockerfile +++ b/ci/docker/conda-python.dockerfile @@ -32,11 +32,6 @@ RUN mamba install -q -y \ nomkl && \ mamba clean --all -# XXX The GCS testbench was already installed in conda-cpp.dockerfile, -# but we changed the installed Python version above, so we need to reinstall it. -COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts -RUN /arrow/ci/scripts/install_gcs_testbench.sh default - ENV ARROW_ACERO=ON \ ARROW_BUILD_STATIC=OFF \ ARROW_BUILD_TESTS=OFF \ diff --git a/ci/scripts/install_gcs_testbench.bat b/ci/scripts/install_gcs_testbench.bat index b03d0c2ad6608..a327fbbedd945 100644 --- a/ci/scripts/install_gcs_testbench.bat +++ b/ci/scripts/install_gcs_testbench.bat @@ -19,7 +19,9 @@ set GCS_TESTBENCH_VERSION="v0.36.0" +python -m pip install pipx || exit /B 1 + @REM Install GCS testbench %GCS_TESTBENCH_VERSION% -python -m pip install ^ +pipx install ^ "https://github.com/googleapis/storage-testbench/archive/%GCS_TESTBENCH_VERSION%.tar.gz" ^ || exit /B 1 diff --git a/ci/scripts/install_gcs_testbench.sh b/ci/scripts/install_gcs_testbench.sh index 5471b3cc238ca..8af2a45b74baa 100755 --- a/ci/scripts/install_gcs_testbench.sh +++ b/ci/scripts/install_gcs_testbench.sh @@ -34,19 +34,24 @@ case "$(uname -m)" in ;; esac -# On newer pythons install into the system will fail, so override that -export PIP_BREAK_SYSTEM_PACKAGES=1 - version=$1 if [[ "${version}" -eq "default" ]]; then version="v0.39.0" - # Latests versions of Testbench require newer setuptools - python3 -m pip install --upgrade setuptools fi +export PIP_BREAK_SYSTEM_PACKAGES=1 +python3 -m pip install pipx + # This script is run with PYTHON undefined in some places, # but those only use older pythons. if [[ -z "${PYTHON_VERSION}" ]] || [[ "${PYTHON_VERSION}" != "3.13" ]]; then - python3 -m pip install \ - "https://github.com/googleapis/storage-testbench/archive/${version}.tar.gz" + pipx_flags=--verbose + if [[ $(id -un) == "root" ]]; then + # Install globally as /root/.local/bin is typically not in $PATH + pipx_flags="${pipx_flags} --global" + fi + if [[ ! -z "${PIPX_PYTHON}" ]]; then + pipx_flags="${pipx_flags} --python ${PIPX_PYTHON}" + fi + pipx install ${pipx_flags} "https://github.com/googleapis/storage-testbench/archive/${version}.tar.gz" fi diff --git a/cpp/src/arrow/filesystem/gcsfs_test.cc b/cpp/src/arrow/filesystem/gcsfs_test.cc index a6022a8d21681..2098cf4d7f319 100644 --- a/cpp/src/arrow/filesystem/gcsfs_test.cc +++ b/cpp/src/arrow/filesystem/gcsfs_test.cc @@ -95,44 +95,41 @@ class GcsTestbench : public ::testing::Environment { if (const auto* env = std::getenv("PYTHON")) { names = {env}; } - auto error = std::string( - "Could not start GCS emulator." - " Used the following list of python interpreter names:"); - for (const auto& interpreter : names) { - auto exe_path = bp::search_path(interpreter); - error += " " + interpreter; - if (exe_path.empty()) { - error += " (exe not found)"; - continue; - } + auto error = std::string("Could not start GCS emulator 'storage-testbench'"); - bp::ipstream output; - server_process_ = bp::child(exe_path, "-m", "testbench", "--port", port_, group_, - bp::std_err > output); + auto testbench_is_running = [](bp::child& process, bp::ipstream& output) { // Wait for message: "* Restarting with" - auto testbench_is_running = [&output, this](bp::child& process) { - std::string line; - std::chrono::time_point end = - std::chrono::steady_clock::now() + std::chrono::seconds(10); - while (server_process_.valid() && server_process_.running() && - std::chrono::steady_clock::now() < end) { - if (output.peek() && std::getline(output, line)) { - std::cerr << line << std::endl; - if (line.find("* Restarting with") != std::string::npos) return true; - } else { - std::this_thread::sleep_for(std::chrono::milliseconds(20)); - } + std::string line; + std::chrono::time_point end = + std::chrono::steady_clock::now() + std::chrono::seconds(10); + while (process.valid() && process.running() && + std::chrono::steady_clock::now() < end) { + if (output.peek() && std::getline(output, line)) { + std::cerr << line << std::endl; + if (line.find("* Restarting with") != std::string::npos) return true; + } else { + std::this_thread::sleep_for(std::chrono::milliseconds(20)); } - return false; - }; + } + return false; + }; - if (testbench_is_running(server_process_)) break; - error += " (failed to start)"; - server_process_.terminate(); - server_process_.wait(); + auto exe_path = bp::search_path("storage-testbench"); + if (!exe_path.empty()) { + bp::ipstream output; + server_process_ = + bp::child(exe_path, "--port", port_, group_, bp::std_err > output); + if (!testbench_is_running(server_process_, output)) { + error += " (failed to start)"; + server_process_.terminate(); + server_process_.wait(); + } + } else { + error += " (exe not found)"; + } + if (!server_process_.valid()) { + error_ = std::move(error); } - if (server_process_.valid() && server_process_.valid()) return; - error_ = std::move(error); } bool running() { return server_process_.running(); } @@ -140,7 +137,10 @@ class GcsTestbench : public ::testing::Environment { ~GcsTestbench() override { // Brutal shutdown, kill the full process group because the GCS testbench may launch // additional children. - group_.terminate(); + try { + group_.terminate(); + } catch (bp::process_error&) { + } if (server_process_.valid()) { server_process_.wait(); } diff --git a/python/pyarrow/tests/conftest.py b/python/pyarrow/tests/conftest.py index e1919497b5116..7a222cec8a7c4 100644 --- a/python/pyarrow/tests/conftest.py +++ b/python/pyarrow/tests/conftest.py @@ -233,17 +233,16 @@ def minio_server_health_check(address): def gcs_server(): port = find_free_port() env = os.environ.copy() - args = [sys.executable, '-m', 'testbench', '--port', str(port)] + exe = 'storage-testbench' + args = [exe, '--port', str(port)] proc = None try: - # check first if testbench module is available - import testbench # noqa:F401 # start server proc = subprocess.Popen(args, env=env) # Make sure the server is alive. if proc.poll() is not None: pytest.skip(f"Command {args} did not start server successfully!") - except (ModuleNotFoundError, OSError) as e: + except OSError as e: pytest.skip(f"Command {args} failed to execute: {e}") else: yield { diff --git a/r/tests/testthat/test-gcs.R b/r/tests/testthat/test-gcs.R index d671c12138c60..1065886b6e3d6 100644 --- a/r/tests/testthat/test-gcs.R +++ b/r/tests/testthat/test-gcs.R @@ -116,12 +116,12 @@ test_that("GcsFileSystem$create() can read json_credentials", { }) skip_on_cran() -skip_if_not(system('python -c "import testbench"') == 0, message = "googleapis-storage-testbench is not installed.") +skip_if_not(system('storage-testbench -h') == 0, message = "googleapis-storage-testbench is not installed.") library(dplyr) testbench_port <- Sys.getenv("TESTBENCH_PORT", "9001") -pid_minio <- sys::exec_background("python", c("-m", "testbench", "--port", testbench_port), +pid_minio <- sys::exec_background("storage-testbench", c("--port", testbench_port), std_out = FALSE, std_err = FALSE # TODO: is there a good place to send output? )