Skip to content

Commit

Permalink
WIP: Enable dependencies of Google BigQuery
Browse files Browse the repository at this point in the history
  • Loading branch information
tigrux committed Apr 29, 2024
1 parent 1426f33 commit d4290b5
Show file tree
Hide file tree
Showing 5 changed files with 171 additions and 13 deletions.
9 changes: 6 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -470,9 +470,12 @@ if(${VELOX_BUILD_MINIMAL_WITH_DWIO}
OR ${VELOX_ENABLE_HIVE_CONNECTOR}
OR ${VELOX_ENABLE_SUBSTRAIT})
# Locate or build protobuf.
set_source(Protobuf)
resolve_dependency(Protobuf 3.21 EXACT)
include_directories(${Protobuf_INCLUDE_DIRS})
# set_source(Protobuf)
# resolve_dependency(Protobuf 3.21 EXACT)
# include_directories(${Protobuf_INCLUDE_DIRS})
find_package(absl REQUIRED)
find_package(utf8_range REQUIRED)
find_package(Protobuf REQUIRED)
endif()

# GCC needs to link a library to enable std::filesystem.
Expand Down
53 changes: 43 additions & 10 deletions scripts/setup-adapters.sh
Original file line number Diff line number Diff line change
Expand Up @@ -58,29 +58,62 @@ function install_gcs-sdk-cpp {
# https://github.com/googleapis/google-cloud-cpp/blob/main/doc/packaging.md#required-libraries

# abseil-cpp
github_checkout abseil/abseil-cpp 20240116.1 --depth 1
sed -i 's/^#define ABSL_OPTION_USE_\(.*\) 2/#define ABSL_OPTION_USE_\1 0/' "absl/base/options.h"
cmake_install -DBUILD_SHARED_LIBS=OFF \
-DABSL_BUILD_TESTING=OFF
github_checkout abseil/abseil-cpp 20240116.2 --depth 1
cmake_install \
-DCMAKE_BUILD_TYPE=Release \
-DABSL_BUILD_TESTING=OFF \
-DABSL_PROPAGATE_CXX_STD=ON \
-DBUILD_SHARED_LIBS=OFF \

# Protocol buffers
github_checkout protocolbuffers/protobuf v26.1 --depth 1
cmake_install \
-DCMAKE_BUILD_TYPE=Release \
-DBUILD_SHARED_LIBS=OFF \
-Dprotobuf_BUILD_TESTS=OFF \
-Dprotobuf_ABSL_PROVIDER=package

# gRPC
github_checkout grpc/grpc v1.62.1 --depth 1
cmake_install \
-DBUILD_SHARED_LIBS=OFF \
-DCMAKE_BUILD_TYPE=Release \
-DBUILD_SHARED_LIBS=OFF \
-DgRPC_INSTALL=ON \
-DgRPC_BUILD_TESTS=OFF \
-DgRPC_ABSL_PROVIDER=package \
-DgRPC_CARES_PROVIDER=package \
-DgRPC_PROTOBUF_PROVIDER=package \
-DgRPC_RE2_PROVIDER=package \
-DgRPC_SSL_PROVIDER=package \
-DgRPC_ZLIB_PROVIDER=package

# crc32
github_checkout google/crc32c 1.1.2 --depth 1
cmake_install -DBUILD_SHARED_LIBS=OFF \
cmake_install \
-DBUILD_SHARED_LIBS=OFF \
-DCMAKE_BUILD_TYPE=Release \
-DBUILD_SHARED_LIBS=OFF \
-DCRC32C_BUILD_TESTS=OFF \
-DCRC32C_BUILD_BENCHMARKS=OFF \
-DCRC32C_USE_GLOG=OFF

# nlohmann json
github_checkout nlohmann/json v3.11.3 --depth 1
cmake_install -DBUILD_SHARED_LIBS=OFF \
cmake_install \
-DCMAKE_BUILD_TYPE=Release \
-DBUILD_SHARED_LIBS=OFF \
-DBUILD_TESTING=OFF \
-DJSON_BuildTests=OFF

# google-cloud-cpp
github_checkout googleapis/google-cloud-cpp v2.22.0 --depth 1
cmake_install -DBUILD_SHARED_LIBS=OFF \
-DCMAKE_INSTALL_MESSAGE=NEVER \
github_checkout googleapis/google-cloud-cpp v2.23.0 --depth 1
cmake_install \
-DBUILD_SHARED_LIBS=OFF \
-DCMAKE_BUILD_TYPE=Release \
-DBUILD_TESTING=OFF \
-DGOOGLE_CLOUD_CPP_ENABLE_EXAMPLES=OFF \
-DGOOGLE_CLOUD_CPP_ENABLE=storage
-DGOOGLE_CLOUD_CPP_ENABLE=storage,bigquery
}

function install_azure-storage-sdk-cpp {
Expand Down
1 change: 1 addition & 0 deletions velox/connectors/hive/storage_adapters/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@ add_subdirectory(s3fs)
add_subdirectory(hdfs)
add_subdirectory(gcs)
add_subdirectory(abfs)
add_subdirectory(bigquery)
48 changes: 48 additions & 0 deletions velox/connectors/hive/storage_adapters/bigquery/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
find_package(google_cloud_cpp_bigquery REQUIRED)


# Once the bigquery_client package is found, define new targets.
add_executable(bq-quickstart quickstart.cc)
target_link_libraries(bq-quickstart
google-cloud-cpp::bigquery

protobuf::libprotobuf

absl::absl_check
absl::absl_log
absl::algorithm
absl::base
absl::bind_front
absl::bits
absl::btree
absl::cleanup
absl::cord
absl::core_headers
absl::debugging
absl::die_if_null
absl::dynamic_annotations
absl::flags
absl::flat_hash_map
absl::flat_hash_set
absl::function_ref
absl::hash
absl::if_constexpr
absl::layout
absl::log_initialize
absl::log_severity
absl::memory
absl::node_hash_map
absl::node_hash_set
absl::optional
absl::span
absl::status
absl::statusor
absl::strings
absl::synchronization
absl::time
absl::type_traits
absl::utility
absl::variant

utf8_range::utf8_validity
)
73 changes: 73 additions & 0 deletions velox/connectors/hive/storage_adapters/bigquery/quickstart.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! [START bigquerystorage_quickstart] [all]
#include "google/cloud/bigquery/storage/v1/bigquery_read_client.h"
#include <iostream>

namespace {
void ProcessRowsInAvroFormat(
::google::cloud::bigquery::storage::v1::AvroSchema const&,
::google::cloud::bigquery::storage::v1::AvroRows const&) {
// Code to deserialize avro rows should be added here.
}
} // namespace

int main(int argc, char* argv[]) try {
if (argc != 3) {
std::cerr << "Usage: " << argv[0] << " <project-id> <table-name>\n";
return 1;
}

// project_name should be in the format "projects/<your-gcp-project>"
std::string const project_name = "projects/" + std::string(argv[1]);
// table_name should be in the format:
// "projects/<project-table-resides-in>/datasets/<dataset-table_resides-in>/tables/<table
// name>" The project values in project_name and table_name do not have to be
// identical.
std::string const table_name = argv[2];

// Create a namespace alias to make the code easier to read.
namespace bigquery_storage = ::google::cloud::bigquery_storage_v1;
constexpr int kMaxReadStreams = 1;
// Create the ReadSession.
auto client = bigquery_storage::BigQueryReadClient(
bigquery_storage::MakeBigQueryReadConnection());
::google::cloud::bigquery::storage::v1::ReadSession read_session;
read_session.set_data_format(
google::cloud::bigquery::storage::v1::DataFormat::AVRO);
read_session.set_table(table_name);
auto session =
client.CreateReadSession(project_name, read_session, kMaxReadStreams);
if (!session) throw std::move(session).status();

// Read rows from the ReadSession.
constexpr int kRowOffset = 0;
auto read_rows = client.ReadRows(session->streams(0).name(), kRowOffset);

std::int64_t num_rows = 0;
for (auto const& row : read_rows) {
if (row.ok()) {
num_rows += row->row_count();
ProcessRowsInAvroFormat(session->avro_schema(), row->avro_rows());
}
}

std::cout << num_rows << " rows read from table: " << table_name << "\n";
return 0;
} catch (google::cloud::Status const& status) {
std::cerr << "google::cloud::Status thrown: " << status << "\n";
return 1;
}
//! [END bigquerystorage_quickstart] [all]

0 comments on commit d4290b5

Please sign in to comment.