diff --git a/.clang-format b/.clang-format index c93dc7d..badd82a 100644 --- a/.clang-format +++ b/.clang-format @@ -7,8 +7,7 @@ Standard: c++20 ColumnLimit: 120 UseCRLF: false -#UseTab: AlignWithSpaces -UseTab: Never +UseTab: AlignWithSpaces TabWidth: 4 IndentWidth: 4 ConstructorInitializerIndentWidth: 4 @@ -28,7 +27,7 @@ AlignConsecutiveDeclarations: None AlignConsecutiveMacros: AcrossEmptyLinesAndComments AlignEscapedNewlines: Left AlignOperands: Align -AlignTrailingComments: true +AlignTrailingComments: false AllowAllArgumentsOnNextLine: true AllowAllParametersOfDeclarationOnNextLine: false diff --git a/CMakeLists.txt b/CMakeLists.txt index 31c8eea..f9db126 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,125 +1,109 @@ -cmake_minimum_required(VERSION 3.15 FATAL_ERROR) - -set(IRODS_CLIENT_VERSION "0.1.0") -set(IRODS_PACKAGE_REVISION "0") +cmake_minimum_required(VERSION 3.18.0 FATAL_ERROR) # # Build Configuration # -find_package(IRODS 4.3.1 REQUIRED) +find_package(IRODS REQUIRED) include(RequireOutOfSourceBuild) -include(IrodsCXXCompiler) +set(IRODS_S3_API_BINARY_NAME "irods_s3_api") + +set(IRODS_CLIENT_VERSION "0.2.0") +set(IRODS_S3_API_VERSION "${IRODS_CLIENT_VERSION}") + +set(IRODS_PACKAGE_REVISION "1") + +include(IrodsExternals) + +include(IrodsCXXCompiler) set(CMAKE_CXX_STANDARD ${IRODS_CXX_STANDARD}) set(CMAKE_CXX_EXTENSIONS OFF) # export-dynamic so stacktrace entries from executables have function names. -set(CMAKE_EXE_LINKER_FLAGS_INIT "-rdynamic -Wl,--export-dynamic -Wl,--enable-new-dtags -Wl,--as-needed -Wl,-z,defs") -# we have intentionally(?) undefined symbols in the plugins, so no -Wl,-z,defs -set(CMAKE_MODULE_LINKER_FLAGS_INIT "-Wl,--enable-new-dtags -Wl,--as-needed") -set(CMAKE_EXE_LINKER_FLAGS_RELEASE_INIT "-Wl,--gc-sections -Wl,-z,combreloc") -set(CMAKE_MODULE_LINKER_FLAGS_RELEASE_INIT "-Wl,--gc-sections -Wl,-z,combreloc") +set(CMAKE_EXE_LINKER_FLAGS_INIT "-Wl,--export-dynamic -Wl,--enable-new-dtags -Wl,--as-needed") include(IrodsRunpathDefaults) -project(irods_client_s3_api VERSION "${IRODS_CLIENT_VERSION}" LANGUAGES CXX C) +project(${IRODS_S3_API_BINARY_NAME} VERSION "${IRODS_S3_API_VERSION}" LANGUAGES CXX) + +find_package(Threads REQUIRED) -include(${IRODS_TARGETS_PATH}) +include("${IRODS_TARGETS_PATH}") include(GNUInstallDirs) include(UseLibCXX) +if (NOT CMAKE_CONFIGURATION_TYPES AND NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build {Debug, Release}." FORCE) + message(STATUS "Setting unspecified CMAKE_BUILD_TYPE to '${CMAKE_BUILD_TYPE}'. This is the correct setting for normal builds.") +endif() + +if (CMAKE_CXX_COMPILER_ID MATCHES ".*Clang") + set(IRODS_BUILD_WITH_WERROR_DEFAULT ON) +else() + set(IRODS_BUILD_WITH_WERROR_DEFAULT OFF) +endif() +set(IRODS_BUILD_WITH_WERROR ${IRODS_BUILD_WITH_WERROR_DEFAULT} CACHE BOOL "Choose whether to compile with -Werror.") + +if (IRODS_BUILD_WITH_WERROR) + add_compile_options(-Werror) +endif() + if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") add_compile_options($<$:-fpermissive>) endif() -add_subdirectory(third-party/hmac_sha256) +add_compile_options(-Wall -Wextra -Wpedantic) + +option(IRODS_ENABLE_ADDRESS_SANITIZER "Enables detection of memory leaks and other features provided by Address Sanitizer." OFF) +if (IRODS_ENABLE_ADDRESS_SANITIZER) + # Make sure the correct llvm-symbolizer binary is available to Address Sanitizer. This binary + # allows debug symbols to be reported appropriately. There are two ways to do this: + # + # export PATH=/opt/irods-externals/clang13.0.0-0/bin:$PATH + # + # - or - + # + # export ASAN_SYMBOLIZER_PATH=/opt/irods-externals/clang13.0.0-0/bin/llvm-symbolizer + # + # detect_container_overflow is disabled to guard against false positives which occur when + # parts of the binary are compiled with ASAN and other parts are not. + add_compile_definitions(IRODS_ADDRESS_SANITIZER_DEFAULT_OPTIONS="log_path=/tmp/irods_client_s3_api_asan_output:detect_container_overflow=0") + add_compile_options( + -fsanitize=address + -fno-omit-frame-pointer + -fno-optimize-sibling-calls + -O1) + add_link_options( + -fsanitize=address + -fno-omit-frame-pointer + -fno-optimize-sibling-calls + -O1) +else() + set(CMAKE_EXE_LINKER_FLAGS_INIT "${CMAKE_EXE_LINKER_FLAGS_INIT} -Wl,-z,defs") +endif() -include_directories( - ${IRODS_EXTERNALS_FULLPATH_BOOST}/include - ${IRODS_EXTERNALS_FULLPATH_FMT}/include) +set(IRODS_S3_API_PROJECT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") +set(IRODS_S3_API_PROJECT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}") find_package(CURL REQUIRED) find_package(nlohmann_json "3.6.1" REQUIRED) find_package(OpenSSL REQUIRED COMPONENTS Crypto SSL) -set(CMAKE_THREAD_PREFER_PTHREAD TRUE) -set(THREADS_PREFER_PTHREAD_FLAG TRUE) +include(ObjectTargetHelpers) -find_package(Threads REQUIRED) +add_subdirectory(core) +add_subdirectory(endpoints) -set(IRODS_S3_API_BINARY_NAME irods_s3_api) add_executable(${IRODS_S3_API_BINARY_NAME}) - -set(IRODS_S3_API_BUCKET_RESOLVER_PLUGIN_NAME static_bucket_resolver) -add_library(${IRODS_S3_API_BUCKET_RESOLVER_PLUGIN_NAME} MODULE) - -set(IRODS_S3_API_AUTH_RESOLVER_NAME static_authentication_resolver) -add_library(${IRODS_S3_API_AUTH_RESOLVER_NAME} MODULE) - -set(IRODS_S3_API_PERSISTENCE_PLUGIN sqlite_persistence_plugin) -add_library(${IRODS_S3_API_PERSISTENCE_PLUGIN} MODULE) - -target_link_libraries( - ${IRODS_S3_API_BINARY_NAME} - PRIVATE - ${IRODS_EXTERNALS_FULLPATH_BOOST}/lib/libboost_system.so - ${IRODS_EXTERNALS_FULLPATH_FMT}/lib/libfmt.so - irods_client - hmac_sha256 - ${CMAKE_DL_LIBS} - CURL::libcurl - nlohmann_json::nlohmann_json - Threads::Threads) - -target_include_directories(${IRODS_S3_API_BUCKET_RESOLVER_PLUGIN_NAME} PRIVATE src) -target_include_directories(${IRODS_S3_API_AUTH_RESOLVER_NAME} PRIVATE src) -target_include_directories(${IRODS_S3_API_PERSISTENCE_PLUGIN} PRIVATE src) - -target_link_libraries( - ${IRODS_S3_API_BUCKET_RESOLVER_PLUGIN_NAME} - PRIVATE - irods_client - ${CMAKE_DL_LIBS} - CURL::libcurl - nlohmann_json::nlohmann_json - ${IRODS_EXTERNALS_FULLPATH_FMT}/lib/libfmt.so) - -target_link_libraries( - ${IRODS_S3_API_AUTH_RESOLVER_NAME} - PRIVATE - irods_client - ${CMAKE_DL_LIBS} - CURL::libcurl - nlohmann_json::nlohmann_json - ${IRODS_EXTERNALS_FULLPATH_FMT}/lib/libfmt.so) - -target_link_libraries( - ${IRODS_S3_API_PERSISTENCE_PLUGIN} - PRIVATE - irods_client - ${CMAKE_DL_LIBS} - CURL::libcurl - nlohmann_json::nlohmann_json - ${IRODS_EXTERNALS_FULLPATH_FMT}/lib/libfmt.so) - -target_include_directories( +target_link_objects( ${IRODS_S3_API_BINARY_NAME} PRIVATE - third-party/hmac_sha256 - ${IRODS_EXTERNALS_FULLPATH_BOOST}/include) - -target_include_directories(${IRODS_S3_API_AUTH_RESOLVER_NAME} PRIVATE ${IRODS_EXTERNALS_FULLPATH_BOOST}/include) -target_include_directories(${IRODS_S3_API_BUCKET_RESOLVER_PLUGIN_NAME} PRIVATE ${IRODS_EXTERNALS_FULLPATH_BOOST}/include) -target_include_directories(${IRODS_S3_API_PERSISTENCE_PLUGIN} PRIVATE ${IRODS_EXTERNALS_FULLPATH_BOOST}/include) - -add_subdirectory(src) + irods_s3_api_core + irods_s3_api_endpoint_authentication + irods_s3_api_endpoint_put_object +) -install( - TARGETS - ${IRODS_S3_API_BINARY_NAME} - ${IRODS_S3_API_AUTH_RESOLVER_NAME} - ${IRODS_S3_API_BUCKET_RESOLVER_PLUGIN_NAME} - DESTINATION "${CMAKE_INSTALL_LIBDIR}/irods/clients/s3_api") +install(TARGETS ${IRODS_S3_API_BINARY_NAME} DESTINATION "${CMAKE_INSTALL_BINDIR}") # # Packaging Configuration @@ -130,6 +114,11 @@ include(IrodsCPackPlatform) set(IRODS_PACKAGE_NAME irods-experimental-client-s3-api) +#list(APPEND CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "${CPACK_PACKAGING_INSTALL_PREFIX}${IRODS_HOME_DIRECTORY}") +#list(APPEND CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "${CPACK_PACKAGING_INSTALL_PREFIX}${IRODS_HOME_DIRECTORY}/scripts") +#list(APPEND CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "${CPACK_PACKAGING_INSTALL_PREFIX}${IRODS_HOME_DIRECTORY}/scripts/irods") +#list(APPEND CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "${CPACK_PACKAGING_INSTALL_PREFIX}${IRODS_HOME_DIRECTORY}/scripts/irods/test") + set(CPACK_PACKAGE_VERSION ${IRODS_CLIENT_VERSION}) set(CPACK_INCLUDE_TOPLEVEL_DIRECTORY OFF) set(CPACK_COMPONENT_INCLUDE_TOPLEVEL_DIRECTORY OFF) @@ -138,28 +127,22 @@ set(CPACK_COMPONENTS_GROUPING IGNORE) set(CPACK_DEB_COMPONENT_INSTALL OFF) set(CPACK_DEBIAN_PACKAGE_SHLIBDEPS OFF) set(CPACK_DEBIAN_PACKAGE_CONTROL_STRICT_PERMISSION ON) -set(CPACK_DEBIAN_PACKAGE_RELEASE "${IRODS_PACKAGE_REVISION}") -if (IRODS_LINUX_DISTRIBUTION_VERSION_CODENAME) - set(CPACK_DEBIAN_PACKAGE_RELEASE "${CPACK_DEBIAN_PACKAGE_RELEASE}~${IRODS_LINUX_DISTRIBUTION_VERSION_CODENAME}") -endif() +#set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${CMAKE_SOURCE_DIR}/packaging/postinst;") +#set(CPACK_DEBIAN_PACKAGE_BREAKS ${IRODS_S3_API_BINARY_NAME}) +#set(CPACK_DEBIAN_PACKAGE_REPLACES ${IRODS_S3_API_BINARY_NAME}) set(CPACK_DEBIAN_FILE_NAME DEB-DEFAULT) set(CPACK_RPM_COMPONENT_INSTALL OFF) set(CPACK_RPM_PACKAGE_LICENSE "BSD-3-Clause") set(CPACK_RPM_PACKAGE_AUTOREQ 0) set(CPACK_RPM_PACKAGE_AUTOPROV 0) -#set(CPACK_RPM_PACKAGE_OBSOLETES ${IRODS_PACKAGE_NAME}) +#set(CPACK_RPM_PACKAGE_OBSOLETES ${IRODS_S3_API_BINARY_NAME}) #set(CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${CMAKE_SOURCE_DIR}/packaging/postinst") -set(CPACK_RPM_PACKAGE_RELEASE "${IRODS_PACKAGE_REVISION}") set(CPACK_RPM_FILE_NAME RPM-DEFAULT) -list(APPEND CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "${CPACK_PACKAGING_INSTALL_PREFIX}${CMAKE_INSTALL_LIBEXECDIR}") -list(APPEND CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "${CPACK_PACKAGING_INSTALL_PREFIX}${CMAKE_INSTALL_LIBEXECDIR}/irods") -list(APPEND CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "${CPACK_PACKAGING_INSTALL_PREFIX}${CMAKE_INSTALL_LIBEXECDIR}/irods/clients") - set(CPACK_ARCHIVE_COMPONENT_INSTALL OFF) -set(CPACK_DEBIAN_PACKAGE_NAME "${IRODS_PACKAGE_NAME}") +set(CPACK_DEBIAN_PACKAGE_NAME ${IRODS_PACKAGE_NAME}) set(CPACK_DEBIAN_PACKAGE_DEPENDS "irods-runtime (= ${IRODS_VERSION})") get_filename_component(CURL_LIBRARY_REALPATH ${CURL_LIBRARY} REALPATH) @@ -184,7 +167,7 @@ elseif (CURL_LIBRARY_REALNAME STREQUAL "libcurl") endif () endif () -set(CPACK_RPM_PACKAGE_NAME "${IRODS_PACKAGE_NAME}") +set(CPACK_RPM_PACKAGE_NAME ${IRODS_PACKAGE_NAME}) set(CPACK_RPM_PACKAGE_REQUIRES "irods-runtime = ${IRODS_VERSION}, libcurl") if (NOT CPACK_GENERATOR) diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt new file mode 100644 index 0000000..cbc2e0f --- /dev/null +++ b/core/CMakeLists.txt @@ -0,0 +1,57 @@ +include(GetGitRevisionDescription) +get_git_head_revision(IRODS_S3_API_GIT_REFSPEC IRODS_S3_API_GIT_SHA1) + +configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/include/irods/private/s3_api/version.hpp.in" + "${CMAKE_CURRENT_BINARY_DIR}/include/irods/private/s3_api/version.hpp" + @ONLY +) + +add_library( + irods_s3_api_core + OBJECT + "${CMAKE_CURRENT_SOURCE_DIR}/src/common.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/src/crlf_parser.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/src/globals.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/src/main.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/src/process_stash.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/src/session.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/src/transport.cpp" +) + +target_link_libraries( + irods_s3_api_core + PRIVATE + irods_client + "${IRODS_EXTERNALS_FULLPATH_BOOST}/lib/libboost_filesystem.so" + "${IRODS_EXTERNALS_FULLPATH_BOOST}/lib/libboost_program_options.so" + "${IRODS_EXTERNALS_FULLPATH_BOOST}/lib/libboost_url.so" + "${IRODS_EXTERNALS_FULLPATH_FMT}/lib/libfmt.so" + CURL::libcurl +) + +target_compile_definitions( + irods_s3_api_core + PRIVATE + ${IRODS_COMPILE_DEFINITIONS} + ${IRODS_COMPILE_DEFINITIONS_PRIVATE} + # The following macro requires that the application never attempt to + # change log levels concurrently across threads. + SPDLOG_NO_ATOMIC_LEVELS + # Defines a macro which is used to construct the URL paths to individual + # endpoint handlers (e.g. /irods-s3-api/0.2.0/collections). + IRODS_S3_API_BASE_URL="/irods-s3-api/${IRODS_S3_API_VERSION}" +) + +target_include_directories( + irods_s3_api_core + PRIVATE + "${CMAKE_CURRENT_SOURCE_DIR}/include" + "${CMAKE_CURRENT_BINARY_DIR}/include" + "${IRODS_S3_API_PROJECT_SOURCE_DIR}/endpoints/shared/include" + "${IRODS_EXTERNALS_FULLPATH_BOOST}/include" + "${IRODS_EXTERNALS_FULLPATH_FMT}/include" + "${IRODS_EXTERNALS_FULLPATH_SPDLOG}/include" +) + +set_target_properties(irods_s3_api_core PROPERTIES EXCLUDE_FROM_ALL TRUE) diff --git a/core/include/irods/private/s3_api/common.hpp b/core/include/irods/private/s3_api/common.hpp new file mode 100644 index 0000000..a5f4c33 --- /dev/null +++ b/core/include/irods/private/s3_api/common.hpp @@ -0,0 +1,206 @@ +#ifndef IRODS_S3_API_ENDPOINT_COMMON_HPP +#define IRODS_S3_API_ENDPOINT_COMMON_HPP + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +struct RcComm; + +namespace irods::http +{ + class session; + + // clang-format off + using field_type = boost::beast::http::field; + using request_type = boost::beast::http::request; + using response_type = boost::beast::http::response; + using status_type = boost::beast::http::status; + using verb_type = boost::beast::http::verb; + + using session_pointer_type = std::shared_ptr; + using request_handler_type = void(*)(session_pointer_type, request_type&); + + using request_handler_map_type = std::unordered_map; + + using query_arguments_type = std::unordered_map; + + using handler_type = void (*)(session_pointer_type, request_type&, query_arguments_type&); + // clang-format on + + enum class authorization_scheme + { + basic = 0, + openid_connect + }; // enum class authorization_scheme + + struct authenticated_client_info + { + authorization_scheme auth_scheme; + std::string username; + std::chrono::steady_clock::time_point expires_at; + // TODO Store an expiration timestamp here. Post discush: let it expire and send reauth code to client. + // Perhaps a purge timestamp as well. This is an optimization situation. + }; // struct authenticated_client_info + + struct url + { + std::string path; + query_arguments_type query; + }; // struct url + + struct client_identity_resolution_result + { + std::optional response; + authenticated_client_info client_info{}; + }; // struct client_identity_resolution_result + + class connection_facade // NOLINT(cppcoreguidelines-special-member-functions) + { + public: + connection_facade() = default; + + explicit connection_facade(irods::connection_pool::connection_proxy&& _conn) + : conn_{std::move(_conn)} + { + } // constructor + + explicit connection_facade(irods::experimental::client_connection&& _conn) + : conn_{std::move(_conn)} + { + } // constructor + + connection_facade(const connection_facade&) = delete; + auto operator=(const connection_facade&) -> connection_facade& = delete; + + connection_facade(connection_facade&&) = default; + auto operator=(connection_facade&&) -> connection_facade& = default; + + explicit operator RcComm*() noexcept + { + if (auto* p = std::get_if(&conn_); p) { + return static_cast(*p); + } + + return static_cast(*std::get_if(&conn_)); + } // operator RcComm* + + operator RcComm&() // NOLINT(google-explicit-constructor) + { + if (auto* p = std::get_if(&conn_); p) { + return *p; + } + + if (auto* p = std::get_if(&conn_); p) { + return *p; + } + + THROW(SYS_INTERNAL_ERR, "Cannot return reference to connection object. connection_facade is empty."); + } // operator RcComm& + + template + auto get_ref() -> T& + { + if (auto* p = std::get_if(&conn_); p) { + return *p; + } + + THROW(SYS_INTERNAL_ERR, "Cannot return reference to connection object. connection_facade is empty."); + } // get_ref + + private: + std::variant + conn_; + }; // class connection_facade + + auto fail(response_type& _response, status_type _status, const std::string_view _error_msg) -> response_type; + + auto fail(response_type& _response, status_type _status) -> response_type; + + auto fail(status_type _status, const std::string_view _error_msg) -> response_type; + + auto fail(status_type _status) -> response_type; + + auto decode(const std::string_view _v) -> std::string; + + auto encode(std::string_view _to_encode) -> std::string; + + // TODO Create a better name. + auto to_argument_list(const std::string_view _urlencoded_string) -> std::unordered_map; + + auto get_url_path(const std::string& _url) -> std::optional; + + auto parse_url(const std::string& _url) -> url; + + auto parse_url(const request_type& _req) -> url; + + auto resolve_client_identity(const request_type& _req) -> client_identity_resolution_result; + + auto execute_operation( + session_pointer_type _sess_ptr, + request_type& _req, + const std::unordered_map& _op_table_get, + const std::unordered_map& _op_table_post) -> void; + + auto get_port_from_url(boost::urls::url_view _url) -> std::optional; +} // namespace irods::http + +namespace irods +{ + template + auto generate_uuid(const Map& _map) -> std::string + { + std::string uuid; + uuid.reserve(36); // NOLINT(cppcoreguidelines-avoid-magic-numbers, readability-magic-numbers) + uuid = to_string(boost::uuids::random_generator{}()); + + while (_map.find(uuid) != std::end(_map)) { + uuid = to_string(boost::uuids::random_generator{}()); + } + + return uuid; + } // generate_uuid + + auto to_permission_string(const irods::experimental::filesystem::perms _p) -> const char*; + + auto to_permission_enum(const std::string_view _s) -> std::optional; + + auto to_object_type_string(const irods::experimental::filesystem::object_type _t) -> const char*; + + auto to_object_type_enum(const std::string_view _s) -> std::optional; + + auto get_connection(const std::string& _username) -> irods::http::connection_facade; + + auto fail(boost::beast::error_code ec, char const* what) -> void; + + auto enable_ticket(RcComm& _comm, const std::string& _ticket) -> int; + + template + // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays, modernize-avoid-c-arrays) + constexpr auto strncpy_null_terminated(char (&_dst)[N], const char* _src) -> char* + { + return std::strncpy(_dst, _src, N - 1); + } // strncpy_null_terminated +} // namespace irods + +#endif // IRODS_S3_API_ENDPOINT_COMMON_HPP diff --git a/core/include/irods/private/s3_api/crlf_parser.hpp b/core/include/irods/private/s3_api/crlf_parser.hpp new file mode 100644 index 0000000..ada7736 --- /dev/null +++ b/core/include/irods/private/s3_api/crlf_parser.hpp @@ -0,0 +1,56 @@ +#ifndef IRODS_S3_API_CRLF_PARSER_HPP +#define IRODS_S3_API_CRLF_PARSER_HPP + +#include "irods/private/s3_api/common.hpp" + +#include +#include + +namespace irods::http +{ + class crlf_parser + { + public: + explicit crlf_parser(const std::string_view _s) + : data_{_s} + { + } + + auto next_crlf(std::int64_t _count = -1) -> bool + { + if (spos_ == 0 && epos_ == 0) { + epos_ = data_.find("\r\n"); + } + else if (epos_ != std::string_view::npos) { + // TODO +2 can lead to wrapping if epos_ is unsigned. + spos_ = std::clamp(epos_ + 2, epos_, std::string_view::npos); // Skip CRLF. + + if (_count > -1) { + epos_ = spos_ + _count; + } + else { + epos_ = data_.find("\r\n", spos_); + } + } + + return std::string_view::npos != epos_; + } + + auto data() -> std::string_view + { + return data_.substr(spos_, epos_ - spos_); + } + + private: + const std::string_view data_; + std::string_view::size_type spos_ = 0; + std::string_view::size_type epos_ = 0; + }; // class crlf_parser + + auto get_multipart_form_data_boundary(const std::string_view _data) -> std::optional; + + auto parse_multipart_form_data(const std::string_view _boundary, const std::string_view _data) + -> query_arguments_type; +} // namespace irods::http + +#endif // IRODS_S3_API_CRLF_PARSER_HPP diff --git a/core/include/irods/private/s3_api/globals.hpp b/core/include/irods/private/s3_api/globals.hpp new file mode 100644 index 0000000..18dee60 --- /dev/null +++ b/core/include/irods/private/s3_api/globals.hpp @@ -0,0 +1,28 @@ +#ifndef IRODS_S3_API_GLOBALS_HPP +#define IRODS_S3_API_GLOBALS_HPP + +#include + +#include +#include +#include + +#include + +namespace irods::http::globals +{ + auto set_configuration(const nlohmann::json& _config) -> void; + auto configuration() -> const nlohmann::json&; + + auto set_request_handler_io_context(boost::asio::io_context& _ioc) -> void; + auto request_handler_io_context() -> boost::asio::io_context&; + + auto set_background_thread_pool(boost::asio::thread_pool& _tp) -> void; + auto background_thread_pool() -> boost::asio::thread_pool&; + auto background_task(std::function _task) -> void; + + auto set_connection_pool(irods::connection_pool& _cp) -> void; + auto connection_pool() -> irods::connection_pool&; +} // namespace irods::http::globals + +#endif // IRODS_S3_API_GLOBALS_HPP diff --git a/core/include/irods/private/s3_api/log.hpp b/core/include/irods/private/s3_api/log.hpp new file mode 100644 index 0000000..df16e26 --- /dev/null +++ b/core/include/irods/private/s3_api/log.hpp @@ -0,0 +1,79 @@ +#ifndef IRODS_S3_API_LOG_HPP +#define IRODS_S3_API_LOG_HPP + +#include +#include + +#include +#include + +namespace irods::http::logging +{ + inline auto trace(const std::string_view _msg) -> void + { + spdlog::trace(_msg); + } // trace + + inline auto info(const std::string_view _msg) -> void + { + spdlog::info(_msg); + } // info + + inline auto debug(const std::string_view _msg) -> void + { + spdlog::debug(_msg); + } // debug + + inline auto warn(const std::string_view _msg) -> void + { + spdlog::warn(_msg); + } // warn + + inline auto error(const std::string_view _msg) -> void + { + spdlog::error(_msg); + } // error + + inline auto critical(const std::string_view _msg) -> void + { + spdlog::critical(_msg); + } // critical + + template + constexpr auto trace(fmt::format_string _format, Args&&... _args) -> void + { + spdlog::trace(_format, std::forward(_args)...); + } // trace + + template + constexpr auto info(fmt::format_string _format, Args&&... _args) -> void + { + spdlog::info(_format, std::forward(_args)...); + } // info + + template + constexpr auto debug(fmt::format_string _format, Args&&... _args) -> void + { + spdlog::debug(_format, std::forward(_args)...); + } // debug + + template + constexpr auto warn(fmt::format_string _format, Args&&... _args) -> void + { + spdlog::warn(_format, std::forward(_args)...); + } // warn + + template + constexpr auto error(fmt::format_string _format, Args&&... _args) -> void + { + spdlog::error(_format, std::forward(_args)...); + } // error + + template + constexpr auto critical(fmt::format_string _format, Args&&... _args) -> void + { + spdlog::critical(_format, std::forward(_args)...); + } // critical +} // namespace irods::http::logging + +#endif // IRODS_S3_API_LOG_HPP diff --git a/core/include/irods/private/s3_api/process_stash.hpp b/core/include/irods/private/s3_api/process_stash.hpp new file mode 100644 index 0000000..8e160ae --- /dev/null +++ b/core/include/irods/private/s3_api/process_stash.hpp @@ -0,0 +1,89 @@ +#ifndef IRODS_S3_API_PROCESS_STASH_HPP +#define IRODS_S3_API_PROCESS_STASH_HPP + +/// \file + +// Boost.Any is used instead of the implementation provided by the standard library +// because it produces the correct results when used across shared library boundaries. +#include + +#include +#include +#include +#include + +/// Defines the set of free functions used to manage the process stash. +/// +/// In iRODS, a process stash is a key-value store which allows a process to store arbitrary +/// data in local memory. Insertions of data generate unique handles which allow retrieval of +/// the data. No two processes share the same data (unless one is a child process). +/// +/// \since 4.2.12 +namespace irods::http::process_stash +{ + /// Inserts an object into the process stash. + /// + /// This function is thread-safe. + /// + /// \param[in] _value The object to insert. + /// + /// \returns A string representing the handle to the inserted object. + /// + /// \since 4.2.12 + auto insert(boost::any _value) -> std::string; + + /// Searches the process stash for the value associated with a specific key. + /// + /// This function is thread-safe. + /// + /// \param[in] _key The string which maps to the value of interest. + /// + /// \returns A std::optional containing the value of interest. + /// \retval A non-empty std::optional object if the key exists. + /// \retval An empty std::optional object otherwise. + /// + /// \since 4.2.12 + auto find(const std::string& _key) -> std::optional; + + /// Removes a value from the process stash. + /// + /// This function is thread-safe. + /// + /// \param[in] _key The string which maps to the value of interest. + /// + /// \returns A boolean indicating if an element is removed. + /// + /// \since 4.2.12 + auto erase(const std::string& _key) -> bool; + + /// Removes all entries satisfying the predicate. + /// + /// This function is thread-safe. + /// + /// \param[in] _pred \parblock The predicate to test each entry against. + /// + /// \p _pred must take a std::string and boost::any by reference and return a boolean + /// indicating whether the entry should be removed. The arguments passed must be + /// treated as read-only. + /// + /// The std::string parameter is the handle mapped to the object stored in the boost::any. + /// + /// The boost::any parameter is the wrapped object identified by the handle. + /// + /// If \p _pred returns \p true, the entry is removed. + /// \endparblock + /// + /// \returns The number of elements removed. + /// + /// \since 4.3.1 + auto erase_if(const std::function& _pred) -> std::size_t; + + /// Returns all handles in the process stash. + /// + /// This function is thread-safe. + /// + /// \since 4.2.12 + auto handles() -> std::vector; +} // namespace irods::http::process_stash + +#endif // IRODS_S3_API_PROCESS_STASH_HPP diff --git a/core/include/irods/private/s3_api/session.hpp b/core/include/irods/private/s3_api/session.hpp new file mode 100644 index 0000000..e602a31 --- /dev/null +++ b/core/include/irods/private/s3_api/session.hpp @@ -0,0 +1,71 @@ +#ifndef IRODS_S3_API_SESSION_HPP +#define IRODS_S3_API_SESSION_HPP + +#include "irods/private/s3_api/common.hpp" + +#include +#include + +#include +#include + +namespace irods::http +{ + class session : public std::enable_shared_from_this + { + public: + session( + boost::asio::ip::tcp::socket&& socket, + const request_handler_map_type& _request_handler_map, + int _max_body_size, + int _timeout_in_seconds); + + auto ip() const -> std::string; + + auto run() -> void; + + auto do_read() -> void; + + auto on_read(boost::beast::error_code ec, std::size_t bytes_transferred) -> void; + + auto on_write(bool close, boost::beast::error_code ec, std::size_t bytes_transferred) -> void; + + auto do_close() -> void; + + auto stream() -> boost::beast::tcp_stream& + { + return stream_; + } // stream + + template + auto send(boost::beast::http::message&& msg) -> void + { + namespace http = boost::beast::http; + + // The lifetime of the message has to extend + // for the duration of the async operation so + // we use a shared_ptr to manage it. + auto sp = std::make_shared>(std::move(msg)); + + // Store a type-erased version of the shared + // pointer in the class to keep it alive. + res_ = sp; + + // Write the response. + http::async_write( + stream_, *sp, boost::beast::bind_front_handler(&session::on_write, shared_from_this(), sp->need_eof())); + } // send + + private: + boost::beast::tcp_stream stream_; + boost::beast::flat_buffer buffer_; + std::optional> parser_; + std::shared_ptr res_; // TODO Probably doesn't need to be a shared_ptr anymore. The session owns it and is + // available for the lifetime of the request. + const request_handler_map_type* req_handlers_; + const int max_body_size_; + const int timeout_in_secs_; + }; // class session +} // namespace irods::http + +#endif // IRODS_S3_API_SESSION_HPP diff --git a/core/include/irods/private/s3_api/transport.hpp b/core/include/irods/private/s3_api/transport.hpp new file mode 100644 index 0000000..6089384 --- /dev/null +++ b/core/include/irods/private/s3_api/transport.hpp @@ -0,0 +1,80 @@ +#ifndef IRODS_S3_API_TRANSPORT_HPP +#define IRODS_S3_API_TRANSPORT_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace irods::http +{ + class transport + { + public: + explicit transport(boost::asio::io_context& _ctx); + virtual ~transport() = default; + + auto connect(std::string_view _host, std::string_view _port) -> void; + auto is_connected() const noexcept -> bool; + auto communicate(boost::beast::http::request& _request) + -> boost::beast::http::response; + + protected: + virtual auto resolve(std::string_view _host, std::string_view _port) + -> boost::asio::ip::tcp::resolver::results_type; + + private: + virtual auto do_connect(boost::asio::ip::tcp::resolver::results_type& _resolved_host) -> void = 0; + virtual auto do_write(boost::beast::http::request& _request) -> void = 0; + virtual auto do_read() -> boost::beast::http::response = 0; + + boost::asio::io_context& io_ctx_; + bool did_connect_; + }; // class transport + + class tls_transport : public transport + { + public: + tls_transport(boost::asio::io_context& _ctx, boost::asio::ssl::context& _secure_ctx); + virtual ~tls_transport(); + + private: + auto resolve(std::string_view _host, std::string_view _port) + -> boost::asio::ip::tcp::resolver::results_type override; + auto do_connect(boost::asio::ip::tcp::resolver::results_type& _resolved_host) -> void override; + auto do_write(boost::beast::http::request& _request) -> void override; + auto do_read() -> boost::beast::http::response override; + auto disconnect() -> void; + auto set_sni_hostname(std::string_view _host) -> void; + + boost::beast::ssl_stream stream_; + }; // class tls_transport + + class plain_transport : public transport + { + public: + explicit plain_transport(boost::asio::io_context& _ctx); + virtual ~plain_transport(); + + private: + auto do_connect(boost::asio::ip::tcp::resolver::results_type& _resolved_host) -> void override; + auto do_write(boost::beast::http::request& _request) -> void override; + auto do_read() -> boost::beast::http::response override; + auto disconnect() -> void; + + boost::beast::tcp_stream stream_; + }; // class plain_transport + + auto transport_factory(const boost::urls::scheme& _scheme, boost::asio::io_context& _ctx) + -> std::unique_ptr; +} // namespace irods::http + +#endif // IRODS_S3_API_TRANSPORT_HPP diff --git a/core/include/irods/private/s3_api/version.hpp.in b/core/include/irods/private/s3_api/version.hpp.in new file mode 100644 index 0000000..3ab5596 --- /dev/null +++ b/core/include/irods/private/s3_api/version.hpp.in @@ -0,0 +1,14 @@ +#ifndef IRODS_S3_API_VERSION_HPP +#define IRODS_S3_API_VERSION_HPP + +namespace irods::s3::version +{ + // clang-format off + inline constexpr const char* binary_name = "@IRODS_S3_API_BINARY_NAME@"; + inline constexpr const char* server_name = "@IRODS_S3_API_BINARY_NAME@/@IRODS_S3_API_VERSION@ (@IRODS_S3_API_GIT_SHA1@)"; + inline constexpr const char* api_version = "@IRODS_S3_API_VERSION@"; + inline constexpr const char* sha = "@IRODS_S3_API_GIT_SHA1@"; + // clang-format on +} // namespace irods::s3::version + +#endif // IRODS_S3_API_VERSION_HPP diff --git a/core/src/common.cpp b/core/src/common.cpp new file mode 100644 index 0000000..4c43590 --- /dev/null +++ b/core/src/common.cpp @@ -0,0 +1,494 @@ +#include "irods/private/s3_api/common.hpp" + +#include "irods/private/s3_api/crlf_parser.hpp" +#include "irods/private/s3_api/globals.hpp" +#include "irods/private/s3_api/log.hpp" +#include "irods/private/s3_api/process_stash.hpp" +#include "irods/private/s3_api/session.hpp" +#include "irods/private/s3_api/version.hpp" + +#include +#include +#include +#include +#include // For addKeyVal(). +#include +#include // For KW_CLOSE_OPEN_REPLICAS. +#include +#include + +#include +#include + +#include +#include +#include +#include + +namespace irods::http +{ + auto fail(response_type& _response, status_type _status, const std::string_view _error_msg) -> response_type + { + _response.result(_status); + _response.set(field_type::server, s3::version::server_name); + _response.set(field_type::content_type, "application/json"); + _response.body() = _error_msg; + _response.prepare_payload(); + return _response; + } // fail + + auto fail(response_type& _response, status_type _status) -> response_type + { + return fail(_response, _status, ""); + } // fail + + auto fail(status_type _status, const std::string_view _error_msg) -> response_type + { + response_type r{_status, 11}; + return fail(r, _status, _error_msg); + } // fail + + auto fail(status_type _status) -> response_type + { + response_type r{_status, 11}; + return fail(r, _status, ""); + } // fail + + auto decode(const std::string_view _v) -> std::string + { + std::string result; + int decoded_length = -1; + + if (auto* decoded = curl_easy_unescape(nullptr, _v.data(), static_cast(_v.size()), &decoded_length); + decoded) { + std::unique_ptr s{decoded, curl_free}; + result.assign(decoded, decoded_length); + } + else { + result.assign(_v); + } + + return result; + } // decode + + auto encode(std::string_view _to_encode) -> std::string + { + char* tmp_encoded_data{curl_easy_escape(nullptr, _to_encode.data(), _to_encode.size())}; + if (tmp_encoded_data == nullptr) { + return {std::cbegin(_to_encode), std::cend(_to_encode)}; + } + + std::string encoded_data{tmp_encoded_data}; + + curl_free(tmp_encoded_data); + return encoded_data; + } // encode + + // TODO Create a better name. + auto to_argument_list(const std::string_view _urlencoded_string) -> std::unordered_map + { + if (_urlencoded_string.empty()) { + return {}; + } + + std::unordered_map kvps; + + std::vector tokens; + boost::split(tokens, _urlencoded_string, boost::is_any_of("&")); + + std::vector kvp; + + for (auto&& t : tokens) { + boost::split(kvp, t, boost::is_any_of("=")); + + if (kvp.size() == 2) { + auto value = decode(kvp[1]); + boost::replace_all(value, "+", " "); + kvps.insert_or_assign(std::move(kvp[0]), value); + } + else if (kvp.size() == 1) { + kvps.insert_or_assign(std::move(kvp[0]), ""); + } + + kvp.clear(); + } + + return kvps; + } // to_argument_list + + auto get_url_path(const std::string& _url) -> std::optional + { + std::unique_ptr curl{curl_url(), curl_url_cleanup}; + + if (!curl) { + logging::error("{}: Could not initialize libcurl.", __func__); + return std::nullopt; + } + + if (const auto ec = curl_url_set(curl.get(), CURLUPART_URL, _url.c_str(), 0); ec) { + logging::error("{}: curl_url_set error: {}", __func__, ec); + return std::nullopt; + } + + using curl_string = std::unique_ptr; + + // Extract the path. + // This is what we use to route requests to the various endpoints. + char* path{}; + const auto ec = curl_url_get(curl.get(), CURLUPART_PATH, &path, 0); + + if (ec == 0) { + curl_string cpath{path, curl_free}; + return path; + } + + logging::error("{}: curl_url_get(CURLUPART_PATH) error: {}", __func__, ec); + return std::nullopt; + } // get_url_path + + auto parse_url(const std::string& _url) -> url + { + std::unique_ptr curl{curl_url(), curl_url_cleanup}; + + if (!curl) { + logging::error("{}: Could not initialize CURLU handle.", __func__); + THROW(SYS_LIBRARY_ERROR, "curl_url error."); + } + + // Include a bogus prefix. We only care about the path and query parts of the URL. + if (const auto ec = curl_url_set(curl.get(), CURLUPART_URL, _url.c_str(), 0); ec) { + logging::error("{}: curl_url_set error: {}", __func__, ec); + THROW(SYS_LIBRARY_ERROR, "curl_url_set(CURLUPART_URL) error."); + } + + url url; + + using curl_string = std::unique_ptr; + + // Extract the path. + // This is what we use to route requests to the various endpoints. + char* path{}; + if (const auto ec = curl_url_get(curl.get(), CURLUPART_PATH, &path, 0); ec == 0) { + curl_string cpath{path, curl_free}; + if (path) { + url.path = path; + } + } + else { + logging::error("{}: curl_url_get(CURLUPART_PATH) error: {}", __func__, ec); + THROW(SYS_LIBRARY_ERROR, "curl_url_get(CURLUPART_PATH) error."); + } + + // Extract the query. + // ChatGPT states that the values in the key value pairs must escape embedded equal signs. + // This allows the HTTP server to parse the query string correctly. Therefore, we don't have + // to protect against that case. The client must send the correct URL escaped input. + char* query{}; + if (const auto ec = curl_url_get(curl.get(), CURLUPART_QUERY, &query, 0); ec == 0) { + curl_string cs{query, curl_free}; + if (query) { + url.query = to_argument_list(query); + } + } + else { + logging::error("{}: curl_url_get(CURLUPART_QUERY) error: {}", __func__, ec); + THROW(SYS_LIBRARY_ERROR, "curl_url_get(CURLUPART_QUERY) error."); + } + + return url; + } // parse_url + + auto parse_url(const request_type& _req) -> url + { + return parse_url(fmt::format("http://ignored{}", _req.target())); + } // parse_url + + auto resolve_client_identity(const request_type& _req) -> client_identity_resolution_result + { + // + // Extract the Bearer token from the Authorization header. + // + + const auto& hdrs = _req.base(); + const auto iter = hdrs.find("Authorization"); + if (iter == std::end(hdrs)) { + logging::error("{}: Missing [Authorization] header.", __func__); + return {.response = fail(status_type::bad_request)}; + } + + logging::debug("{}: Authorization value: [{}]", __func__, iter->value()); + + auto pos = iter->value().find("Bearer "); + if (std::string_view::npos == pos) { + logging::debug("{}: Malformed authorization header.", __func__); + return {.response = fail(status_type::bad_request)}; + } + + std::string bearer_token{iter->value().substr(pos + 7)}; + boost::trim(bearer_token); + logging::debug("{}: Bearer token: [{}]", __func__, bearer_token); + + // Verify the bearer token is known to the server. If not, return an error. + auto mapped_value{irods::http::process_stash::find(bearer_token)}; + if (!mapped_value.has_value()) { + logging::error("{}: Could not find bearer token matching [{}].", __func__, bearer_token); + return {.response = fail(status_type::unauthorized)}; + } + + auto* client_info{boost::any_cast(&*mapped_value)}; + if (client_info == nullptr) { + logging::error("{}: Could not find bearer token matching [{}].", __func__, bearer_token); + return {.response = fail(status_type::unauthorized)}; + } + + if (std::chrono::steady_clock::now() >= client_info->expires_at) { + logging::error("{}: Session for bearer token [{}] has expired.", __func__, bearer_token); + return {.response = fail(status_type::unauthorized)}; + } + + logging::trace("{}: Client is authenticated.", __func__); + return {.client_info = std::move(*client_info)}; + } // resolve_client_identity + + auto execute_operation( + session_pointer_type _sess_ptr, + request_type& _req, + const std::unordered_map& _op_table_get, + const std::unordered_map& _op_table_post) -> void + { + if (_req.method() == verb_type::get) { + if (_op_table_get.empty()) { + logging::error("{}: HTTP method not supported.", __func__); + return _sess_ptr->send(irods::http::fail(status_type::method_not_allowed)); + } + + auto url = irods::http::parse_url(_req); + + const auto op_iter = url.query.find("op"); + if (op_iter == std::end(url.query)) { + logging::error("{}: Missing [op] parameter.", __func__); + return _sess_ptr->send(irods::http::fail(status_type::bad_request)); + } + + if (const auto iter = _op_table_get.find(op_iter->second); iter != std::end(_op_table_get)) { + return (iter->second)(_sess_ptr, _req, url.query); + } + + logging::error("{}: Operation [{}] not supported.", __func__, op_iter->second); + return _sess_ptr->send(fail(status_type::bad_request)); + } + + if (_req.method() == verb_type::post) { + if (_op_table_post.empty()) { + logging::error("{}: HTTP method not supported.", __func__); + return _sess_ptr->send(irods::http::fail(status_type::method_not_allowed)); + } + + query_arguments_type args; + + if (auto content_type = _req.base()["content-type"]; + boost::istarts_with(content_type, "multipart/form-data")) { + const auto boundary = irods::http::get_multipart_form_data_boundary(content_type); + + if (!boundary) { + logging::error("{}: Could not extract [boundary] from [Content-Type] header. ", __func__); + return _sess_ptr->send(irods::http::fail(status_type::bad_request)); + } + + args = irods::http::parse_multipart_form_data(*boundary, _req.body()); + } + else if (boost::istarts_with(content_type, "application/x-www-form-urlencoded")) { + args = irods::http::to_argument_list(_req.body()); + } + else { + logging::error("{}: Content type [{}] not supported.", __func__, content_type); + return _sess_ptr->send(irods::http::fail(status_type::bad_request)); + } + + const auto op_iter = args.find("op"); + if (op_iter == std::end(args)) { + logging::error("{}: Missing [op] parameter.", __func__); + return _sess_ptr->send(irods::http::fail(status_type::bad_request)); + } + + if (const auto iter = _op_table_post.find(op_iter->second); iter != std::end(_op_table_post)) { + return (iter->second)(_sess_ptr, _req, args); + } + + logging::error("{}: Operation [{}] not supported.", __func__, op_iter->second); + return _sess_ptr->send(fail(status_type::bad_request)); + } + + logging::error("{}: HTTP method not supported.", __func__); + return _sess_ptr->send(irods::http::fail(status_type::method_not_allowed)); + } // operation_dispatch + + auto get_port_from_url(boost::urls::url_view _url) -> std::optional + { + if (_url.has_port()) { + return _url.port(); + } + + switch (_url.scheme_id()) { + case boost::urls::scheme::https: + logging::debug("{}: Detected HTTPS scheme, using port 443.", __func__); + return "443"; + case boost::urls::scheme::http: + logging::debug("{}: Detected HTTP scheme, using port 80.", __func__); + return "80"; + default: + logging::error("{}: Cannot deduce port from url [{}].", __func__, _url.data()); + return std::nullopt; + } + } // get_port_from_url +} // namespace irods::http + +namespace irods +{ + auto to_permission_string(const irods::experimental::filesystem::perms _p) -> const char* + { + using irods::experimental::filesystem::perms; + + // clang-format off + switch (_p) { + case perms::null: return "null"; + case perms::read_metadata: return "read_metadata"; + case perms::read_object: + case perms::read: return "read_object"; + case perms::create_metadata: return "create_metadata"; + case perms::modify_metadata: return "modify_metadata"; + case perms::delete_metadata: return "delete_metadata"; + case perms::create_object: return "create_object"; + case perms::modify_object: + case perms::write: return "modify_object"; + case perms::delete_object: return "delete_object"; + case perms::own: return "own"; + } + // clang-format on + + THROW(SYS_INVALID_INPUT_PARAM, fmt::format("Cannot convert permission enumeration to string.")); + } // to_permission_string + + auto to_permission_enum(const std::string_view _s) -> std::optional + { + using irods::experimental::filesystem::perms; + + // clang-format off + if (_s == "null") { return perms::null; } + if (_s == "read_metadata") { return perms::read_metadata; } + if (_s == "read_object") { return perms::read; } + if (_s == "read") { return perms::read; } + if (_s == "create_metadata") { return perms::create_metadata; } + if (_s == "modify_metadata") { return perms::modify_metadata; } + if (_s == "delete_metadata") { return perms::delete_metadata; } + if (_s == "create_object") { return perms::create_object; } + if (_s == "modify_object") { return perms::write; } + if (_s == "write") { return perms::write; } + if (_s == "delete_object") { return perms::delete_object; } + if (_s == "own") { return perms::own; } + // clang-format on + + return std::nullopt; + } // to_permission_enum + + auto to_object_type_string(const irods::experimental::filesystem::object_type _t) -> const char* + { + using irods::experimental::filesystem::object_type; + + // clang-format off + switch (_t) { + case object_type::collection: return "collection"; + case object_type::data_object: return "data_object"; + case object_type::none: return "none"; + case object_type::not_found: return "not_found"; + case object_type::special_collection: return "special_collection"; + case object_type::unknown: return "unknown"; + default: return "?"; + } + // clang-format on + } // to_object_type_string + + auto to_object_type_enum(const std::string_view _s) -> std::optional + { + using irods::experimental::filesystem::object_type; + + // clang-format off + if (_s == "collection") { return object_type::collection; } + if (_s == "data_object") { return object_type::data_object; } + if (_s == "none") { return object_type::none; } + if (_s == "not_found") { return object_type::not_found; } + if (_s == "special_collection") { return object_type::special_collection; } + if (_s == "unknown") { return object_type::unknown; } + // clang-format on + + return std::nullopt; + } // to_object_type_enum + + auto get_connection(const std::string& _username) -> irods::http::connection_facade + { + using json_pointer = nlohmann::json::json_pointer; + + static const auto& config = irods::http::globals::configuration(); + static const auto& irods_client_config = config.at("irods_client"); + static const auto& zone = irods_client_config.at("zone").get_ref(); + + if (config.at(json_pointer{"/irods_client/enable_4_2_compatibility"}).get()) { + static const auto& rodsadmin_username = + irods_client_config.at(json_pointer{"/proxy_admin_account/username"}).get_ref(); + static auto rodsadmin_password = + irods_client_config.at(json_pointer{"/proxy_admin_account/password"}).get_ref(); + + irods::experimental::client_connection conn{ + irods::experimental::defer_authentication, + irods_client_config.at("host").get_ref(), + irods_client_config.at("port").get(), + {rodsadmin_username, zone}, + {_username, zone}}; + + auto* conn_ptr = static_cast(conn); + + if (const auto ec = clientLoginWithPassword(conn_ptr, rodsadmin_password.data()); ec < 0) { + http::logging::error("{}: clientLoginWithPassword error: {}", __func__, ec); + THROW(SYS_INTERNAL_ERR, "clientLoginWithPassword error."); + } + + return irods::http::connection_facade{std::move(conn)}; + } + + auto conn = irods::http::globals::connection_pool().get_connection(); + + http::logging::trace("{}: Changing identity associated with connection to [{}].", __func__, _username); + + SwitchUserInput input{}; + + irods::at_scope_exit clear_options{[&input] { clearKeyVal(&input.options); }}; + + irods::strncpy_null_terminated(input.username, _username.c_str()); + irods::strncpy_null_terminated(input.zone, zone.c_str()); + addKeyVal(&input.options, KW_CLOSE_OPEN_REPLICAS, ""); + + if (const auto ec = rc_switch_user(static_cast(conn), &input); ec < 0) { + http::logging::error("{}: rc_switch_user error: {}", __func__, ec); + THROW(ec, "rc_switch_user error."); + } + + http::logging::trace("{}: Successfully changed identity associated with connection to [{}].", __func__, _username); + + return irods::http::connection_facade{std::move(conn)}; + } // get_connection + + auto fail(boost::beast::error_code ec, char const* what) -> void + { + http::logging::error("{}: {}: {}", __func__, what, ec.message()); + } // fail + + auto enable_ticket(RcComm& _comm, const std::string& _ticket) -> int + { + TicketAdminInput input{}; + input.arg1 = const_cast("session"); // NOLINT(cppcoreguidelines-pro-type-const-cast) + input.arg2 = const_cast(_ticket.c_str()); // NOLINT(cppcoreguidelines-pro-type-const-cast) + input.arg3 = const_cast(""); // NOLINT(cppcoreguidelines-pro-type-const-cast) + + return rcTicketAdmin(&_comm, &input); + } // enable_ticket +} // namespace irods diff --git a/core/src/crlf_parser.cpp b/core/src/crlf_parser.cpp new file mode 100644 index 0000000..b17e77d --- /dev/null +++ b/core/src/crlf_parser.cpp @@ -0,0 +1,134 @@ +#include "irods/private/s3_api/crlf_parser.hpp" + +#include "irods/private/s3_api/log.hpp" + +#include +#include + +#include + +namespace irods::http +{ + auto get_multipart_form_data_boundary(const std::string_view _data) -> std::optional + { + // The ext_list type does not treat "multipart/form-data" as a single string. + // I'm not sure if this is by design, but we have to skip the multipart/ prefix so that + // ext_list can extract the boundary argument. + // + // TODO Consider opening an issue for Boost.Beast in Github. + + const auto pos = _data.find("form-data"); + + if (pos == std::string_view::npos) { + return std::nullopt; + } + + boost::beast::http::ext_list list{_data.substr(pos)}; + const auto iter = list.find("form-data"); + + if (iter == std::end(list)) { + logging::error("{}: Missing [boundary] parameter in [multipart/form-data] value.", __func__); + return std::nullopt; + } + + for (auto&& param : iter->second) { + if (param.first == "boundary") { + logging::debug("PARAM = (name={}, value={})", param.first, param.second); + return param.second; + } + } + + return std::nullopt; + } // get_multipart_form_data_boundary + + auto parse_multipart_form_data(const std::string_view _boundary, const std::string_view _data) + -> query_arguments_type + { + if (_data.empty()) { + return {}; + } + + query_arguments_type args; + + const auto boundary_start = fmt::format("--{}", _boundary); + const auto boundary_end = fmt::format("--{}--", _boundary); + + crlf_parser p{_data}; + auto found_boundary_end = false; + std::string param_name; + + while (true) { + // Find boundary. + while (p.next_crlf()) { + //logging::trace("LINE => {}", p.data()); + + if (p.data().starts_with(boundary_end)) { + found_boundary_end = true; + break; + } + + if (p.data().starts_with(boundary_start)) { + break; + } + } + + if (found_boundary_end) { + logging::trace("END OF REQUEST"); + break; + } + + std::int64_t content_length = -1; + + // Read headers. + // Content-Disposition is a required header. It defines the name of the parameter. + while (p.next_crlf()) { + if (p.data().empty()) { + logging::trace("END OF HEADERS"); + break; + } + + if (boost::istarts_with(p.data(), "content-disposition:")) { + // See https://www.rfc-editor.org/rfc/rfc2045 for details about the + // structure of MIME types. + boost::beast::http::ext_list list{p.data().substr(std::strlen("content-disposition:"))}; + const auto type_iter = list.find("form-data"); + + if (type_iter != std::end(list)) { + for (auto&& param : type_iter->second) { + if (param.first == "name") { + logging::trace("PARAM = (name={}, value={})", param.first, param.second); + param_name = param.second; + break; + } + } + } + } + else if (boost::istarts_with(p.data(), "content-length:")) { + const auto slen = std::string{p.data().substr(std::strlen("content-length:"))}; + try { + content_length = std::stoll(slen); + logging::debug("CONTENT LENGTH = [{}]", content_length); + } + catch (...) { + } + } + else if (boost::istarts_with(p.data(), "content-type:")) { + if (!boost::icontains(p.data().substr(std::strlen("content-type:")), "application/octet-stream")) { + logging::trace("INVALID CONTENT-TYPE"); + args.clear(); + return args; + } + } + } + + // Read content. + p.next_crlf(content_length); + logging::debug("{}: CAPTURED CONTENT SIZE = [{}]", __func__, p.data().size()); + args.insert_or_assign(param_name, std::string{p.data()}); + //logging::trace("CONTENT => [{}]", p.data()); + logging::trace("END OF CONTENT"); + } + + return args; + } // parse_multipart_form_data +} // namespace irods::http diff --git a/core/src/globals.cpp b/core/src/globals.cpp new file mode 100644 index 0000000..6880e5a --- /dev/null +++ b/core/src/globals.cpp @@ -0,0 +1,72 @@ +#include "irods/private/s3_api/globals.hpp" + +#include + +namespace +{ + // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) + const nlohmann::json* g_config{}; + + // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) + boost::asio::io_context* g_req_handler_ioc{}; + + // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) + boost::asio::thread_pool* g_bg_thread_pool{}; + + // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) + irods::connection_pool* g_conn_pool{}; +} // anonymous namespace + +namespace irods::http::globals +{ + auto set_configuration(const nlohmann::json& _config) -> void + { + g_config = &_config; + } // set_configuration + + auto configuration() -> const nlohmann::json& + { + return *g_config; + } // configuration + + auto set_request_handler_io_context(boost::asio::io_context& _ioc) -> void + { + g_req_handler_ioc = &_ioc; + } // set_request_handler_io_context + + auto request_handler_io_context() -> boost::asio::io_context& + { + return *g_req_handler_ioc; + } // request_handler_io_context + + auto set_background_thread_pool(boost::asio::thread_pool& _tp) -> void + { + g_bg_thread_pool = &_tp; + } // set_background_thread_pool + + auto background_thread_pool() -> boost::asio::thread_pool& + { + return *g_bg_thread_pool; + } // background_thread_pool + + auto background_task(std::function _task) -> void + { + boost::asio::post(background_thread_pool(), [t = std::move(_task)] { + try { + t(); + } + catch (...) { + } + }); + } // background_task + + auto set_connection_pool(irods::connection_pool& _cp) -> void + { + g_conn_pool = &_cp; + } // set_connection_pool + + auto connection_pool() -> irods::connection_pool& + { + return *g_conn_pool; + } // connection_pool +} // namespace irods::http::globals diff --git a/core/src/main.cpp b/core/src/main.cpp new file mode 100644 index 0000000..64d32d5 --- /dev/null +++ b/core/src/main.cpp @@ -0,0 +1,865 @@ +#include "irods/private/s3_api/common.hpp" +#include "irods/private/s3_api/globals.hpp" +#include "irods/private/s3_api/handlers.hpp" +#include "irods/private/s3_api/log.hpp" +#include "irods/private/s3_api/session.hpp" +#include "irods/private/s3_api/transport.hpp" +#include "irods/private/s3_api/process_stash.hpp" +#include "irods/private/s3_api/version.hpp" + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-parameter" +#include +#pragma clang diagnostic pop + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// __has_feature is a Clang specific feature. +// The preprocessor code below exists so that other compilers can be used (e.g. GCC). +#ifndef __has_feature +# define __has_feature(feature) 0 +#endif + +#if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__) +// Defines default options for running the S3 API with Address Sanitizer enabled. +// This is a convenience function which allows the S3 API to start without needing the +// administrator to specify options via environment variables. +extern "C" const char* __asan_default_options() +{ + // See root CMakeLists.txt file for definition. + return IRODS_ADDRESS_SANITIZER_DEFAULT_OPTIONS; +} // __asan_default_options +#endif + +// clang-format off +namespace beast = boost::beast; // from +namespace net = boost::asio; // from +namespace po = boost::program_options; +namespace logging = irods::http::logging; + +using json = nlohmann::json; +using tcp = boost::asio::ip::tcp; // from + +// IRODS_S3_API_BASE_URL is a macro defined by the CMakeLists.txt. +const irods::http::request_handler_map_type req_handlers{ + {IRODS_S3_API_BASE_URL "/authenticate", irods::http::handler::authentication}, + {IRODS_S3_API_BASE_URL "/PutObject", irods::http::handler::put_object} +}; +// clang-format on + +// Accepts incoming connections and launches the sessions. +class listener : public std::enable_shared_from_this +{ + public: + listener(net::io_context& ioc, const tcp::endpoint& endpoint, const json& _config) + : ioc_{ioc} + , acceptor_{net::make_strand(ioc)} + , max_body_size_{_config.at(json::json_pointer{"/s3_server/requests/max_size_of_request_body_in_bytes"}) + .get()} + , timeout_in_secs_{_config.at(json::json_pointer{"/s3_server/requests/timeout_in_seconds"}).get()} + { + beast::error_code ec; + + // Open the acceptor + acceptor_.open(endpoint.protocol(), ec); + if (ec) { + irods::fail(ec, "open"); + return; + } + + // Allow address reuse + acceptor_.set_option(net::socket_base::reuse_address(true), ec); + if (ec) { + irods::fail(ec, "set_option"); + return; + } + + // Bind to the server address + acceptor_.bind(endpoint, ec); + if (ec) { + irods::fail(ec, "bind"); + return; + } + + // Start listening for connections + acceptor_.listen(net::socket_base::max_listen_connections, ec); + if (ec) { + irods::fail(ec, "listen"); + return; + } + } // listener (constructor) + + // Start accepting incoming connections. + auto run() -> void + { + do_accept(); + } // run + + private: + auto do_accept() -> void + { + // The new connection gets its own strand + acceptor_.async_accept( + net::make_strand(ioc_), beast::bind_front_handler(&listener::on_accept, shared_from_this())); + } // do_accept + + auto on_accept(beast::error_code ec, tcp::socket socket) -> void + { + if (ec) { + irods::fail(ec, "accept"); + //return; // To avoid infinite loop + } + else { + // Create the session and run it + std::make_shared(std::move(socket), req_handlers, max_body_size_, timeout_in_secs_) + ->run(); + } + + // Accept another connection + do_accept(); + } // on_accept + + net::io_context& ioc_; + tcp::acceptor acceptor_; + const int max_body_size_; + const int timeout_in_secs_; +}; // class listener + +auto print_version_info() -> void +{ + namespace version = irods::s3::version; + const std::string_view sha = version::sha; + constexpr auto sha_size = 7; + fmt::print("{} v{}-{}\n", version::binary_name, version::api_version, sha.substr(0, sha_size)); +} // print_version_info + +constexpr auto default_jsonschema() -> std::string_view +{ + // clang-format on + return R"({{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://schemas.irods.org/irods-s3-api/0.2.0/schema.json", + "type": "object", + "properties": {{ + "s3_server": {{ + "type": "object", + "properties": {{ + "host": {{ + "type": "string", + "pattern": "^[0-9]{{1,3}}\\.[0-9]{{1,3}}\\.[0-9]{{1,3}}\\.[0-9]{{1,3}}$" + }}, + "port": {{ + "type": "integer" + }}, + "log_level": {{ + "enum": [ + "trace", + "debug", + "info", + "warn", + "error", + "critical" + ] + }}, + "plugins": {{ + "type": "object" + }}, + "region": {{ + "type": "string" + }}, + "authentication": {{ + "type": "object", + "properties": {{ + "eviction_check_interval_in_seconds": {{ + "type": "integer", + "minimum": 1 + }}, + "basic": {{ + "type": "object", + "properties": {{ + "timeout_in_seconds": {{ + "type": "integer", + "minimum": 1 + }} + }}, + "required": [ + "timeout_in_seconds" + ] + }} + }}, + "required": [ + "eviction_check_interval_in_seconds", + "basic" + ] + }}, + "requests": {{ + "type": "object", + "properties": {{ + "threads": {{ + "type": "integer", + "minimum": 1 + }}, + "max_size_of_request_body_in_bytes": {{ + "type": "integer", + "minimum": 0 + }}, + "timeout_in_seconds": {{ + "type": "integer", + "minimum": 1 + }} + }}, + "required": [ + "threads", + "max_size_of_request_body_in_bytes", + "timeout_in_seconds" + ] + }}, + "background_io": {{ + "type": "object", + "properties": {{ + "threads": {{ + "type": "integer", + "minimum": 1 + }} + }}, + "required": [ + "threads" + ] + }} + }}, + "required": [ + "host", + "port", + "authentication", + "requests", + "background_io" + ] + }}, + "irods_client": {{ + "type": "object", + "properties": {{ + "host": {{ + "type": "string" + }}, + "port": {{ + "type": "integer" + }}, + "zone": {{ + "type": "string" + }}, + "tls": {{ + "type": "object", + "properties": {{ + "client_server_policy": {{ + "enum": [ + "CS_NEG_REFUSE", + "CS_NEG_DONT_CARE", + "CS_NEG_REQUIRE" + ] + }}, + "ca_certificate_file": {{ + "type": "string" + }}, + "certificate_chain_file": {{ + "type": "string" + }}, + "dh_params_file": {{ + "type": "string" + }}, + "verify_server": {{ + "enum": [ + "none", + "cert", + "hostname" + ] + }} + }}, + "required": [ + "client_server_policy", + "ca_certificate_file", + "dh_params_file", + "verify_server" + ] + }}, + "enable_4_2_compatibility": {{ + "type": "boolean" + }}, + "proxy_admin_account": {{ + "type": "object", + "properties": {{ + "username": {{ + "type": "string" + }}, + "password": {{ + "type": "string" + }} + }}, + "required": [ + "username", + "password" + ] + }}, + "connection_pool": {{ + "type": "object", + "properties": {{ + "size": {{ + "type": "integer", + "minimum": 1 + }}, + "refresh_timeout_in_seconds": {{ + "type": "integer", + "minimum": 1 + }}, + "max_retrievals_before_refresh": {{ + "type": "integer", + "minimum": 1 + }}, + "refresh_when_resource_changes_detected": {{ + "type": "boolean" + }} + }}, + "required": [ + "size" + ] + }}, + "resource": {{ + "type": "string" + }}, + "max_number_of_bytes_per_read_operation": {{ + "type": "integer", + "minimum": 1 + }}, + "buffer_size_in_bytes_for_write_operations": {{ + "type": "integer", + "minimum": 1 + }} + }}, + "required": [ + "host", + "port", + "zone", + "enable_4_2_compatibility", + "proxy_admin_account", + "connection_pool", + "resource", + "max_number_of_parallel_write_streams", + "max_number_of_bytes_per_read_operation", + "buffer_size_in_bytes_for_write_operations", + "max_number_of_rows_per_catalog_query" + ] + }} + }}, + "required": [ + "s3_server", + "irods_client" + ] +}} +)"; + // clang-format on +} // default_jsonschema + +auto print_configuration_template() -> void +{ + // clang-format off + fmt::print(R"({{ + "s3_server": {{ + "host": "0.0.0.0", + "port": 9000, + + "log_level": "info", + + "plugins": {{ + "static_bucket_resolver": {{ + "name": "static_bucket_resolver", + "mappings": {{ + "": "/path/to/collection" + }} + }}, + + "static_authentication_resolver": {{ + "name": "static_authentication_resolver", + "users": {{ + "": {{ + "username": "", + "secret_key": "" + }} + }} + }} + }}, + + "region": "us-east-1", + + "authentication": {{ + "eviction_check_interval_in_seconds": 60, + + "basic": {{ + "timeout_in_seconds": 3600 + }} + }}, + + "requests": {{ + "threads": 3, + "max_size_of_request_body_in_bytes": 8388608, + "timeout_in_seconds": 30 + }}, + + "background_io": {{ + "threads": 6 + }} + }}, + + "irods_client": {{ + "host": "", + "port": 1247, + "zone": "", + + "tls": {{ + "client_server_policy": "CS_NEG_REFUSE", + "ca_certificate_file": "", + "certificate_chain_file": "", + "dh_params_file": "", + "verify_server": "" + }}, + + "enable_4_2_compatibility": false, + + "proxy_admin_account": {{ + "username": "", + "password": "" + }}, + + "connection_pool": {{ + "size": 6, + "refresh_timeout_in_seconds": 600, + "max_retrievals_before_refresh": 16, + "refresh_when_resource_changes_detected": true + }}, + + "resource": "", + + "max_number_of_bytes_per_read_operation": 8192, + "buffer_size_in_bytes_for_write_operations": 8192 + }} +}} +)"); + // clang-format on +} // print_configuration_template + +auto print_usage() -> void +{ + fmt::print(R"_(irods_s3_api - Presents an iRODS zone as S3 compatible storage + +Usage: irods_s3_api [OPTION]... CONFIG_FILE_PATH + +CONFIG_FILE_PATH must point to a file containing a JSON structure containing +configuration options. + +--dump-config-template can be used to generate a default configuration file. +See this option's description for more information. + +--dump-default-jsonschema can be used to generate a default schema file. +See this option's description for more information. + +Options: + --dump-config-template + Print configuration template to stdout and exit. Some + options have values which act as placeholders. If used + to generate a configuration file, those options will + need to be updated. + --dump-default-jsonschema + Print the default JSON schema to stdout and exit. The + JSON schema output can be used to create a custom + schema. This is for cases where the default schema is + too restrictive or contains a bug. + --jsonschema-file SCHEMA_FILE_PATH + Validate server configuration against SCHEMA_FILE_PATH. + Validation is performed before startup. If validation + fails, the server will exit. + -h, --help Display this help message and exit. + -v, --version Display version information and exit. + +)_"); + + print_version_info(); +} // print_usage + +auto is_valid_configuration(const std::string& _schema_path, const std::string& _config_path) -> bool +{ + try { + fmt::print("Validating configuration file ...\n"); + + const auto validate_config = [&_config_path](const std::string_view _schema_path) -> int { + constexpr std::string_view python_code = "import json, jsonschema; " + "config_file = open('{}'); " + "config = json.load(config_file); " + "config_file.close(); " + "schema_file = open('{}'); " + "schema = json.load(schema_file); " + "schema_file.close(); " + "jsonschema.validate(config, schema);"; + + return boost::process::system( + boost::process::search_path("python3"), "-c", fmt::format(python_code, _config_path, _schema_path)); + }; + + std::string schema; + int ec = -1; + + if (_schema_path.empty()) { + fmt::print("No JSON schema file provided. Using default.\n"); + + constexpr const char* default_schema_file_path = "/tmp/default_irods_s3_api_jsonschema.json"; + + if (std::ofstream out{default_schema_file_path}; out) { + out << fmt::format(default_jsonschema()); + } + else { + fmt::print(stderr, "Could not create local schema file for validation.\n"); + return false; + } + + ec = validate_config(default_schema_file_path); + } + else { + fmt::print("Using user-provided schema file [{}].\n", _schema_path); + ec = validate_config(_schema_path); + } + + if (ec == 0) { + fmt::print("Configuration passed validation!\n"); + return true; + } + + fmt::print(stderr, "Configuration failed validation.\n"); + } + catch (const std::system_error& e) { + fmt::print(stderr, "Error: {}\n", e.what()); + } + catch (const std::exception& e) { + fmt::print(stderr, "Error: {}\n", e.what()); + } + + return false; +} // is_valid_configuration + +auto set_log_level(const json& _config) -> void +{ + const auto iter = _config.find("log_level"); + + if (iter == std::end(_config)) { + spdlog::set_level(spdlog::level::info); + } + + const auto& lvl_string = iter->get_ref(); + auto lvl_enum = spdlog::level::info; + + // clang-format off + if (lvl_string == "trace") { lvl_enum = spdlog::level::trace; } + else if (lvl_string == "info") { lvl_enum = spdlog::level::info; } + else if (lvl_string == "debug") { lvl_enum = spdlog::level::debug; } + else if (lvl_string == "warn") { lvl_enum = spdlog::level::warn; } + else if (lvl_string == "error") { lvl_enum = spdlog::level::err; } + else if (lvl_string == "critical") { lvl_enum = spdlog::level::critical; } + else { logging::warn("Invalid log_level. Setting to [info]."); } + // clang-format on + + spdlog::set_level(lvl_enum); +} // set_log_level + +auto init_tls(const json& _config) -> void +{ + const auto set_env_var = [&](const auto& _json_ptr_path, const char* _env_var, const char* _default_value = "") { + using json_ptr = json::json_pointer; + + if (const auto v = _config.value(json_ptr{_json_ptr_path}, _default_value); !v.empty()) { + const auto env_var_upper = boost::to_upper_copy(_env_var); + logging::trace("Setting environment variable [{}] to [{}].", env_var_upper, v); + setenv(env_var_upper.c_str(), v.c_str(), 1); // NOLINT(concurrency-mt-unsafe) + } + }; + + set_env_var("/irods_client/tls/client_server_policy", irods::KW_CFG_IRODS_CLIENT_SERVER_POLICY, "CS_NEG_REFUSE"); + set_env_var("/irods_client/tls/ca_certificate_file", irods::KW_CFG_IRODS_SSL_CA_CERTIFICATE_FILE); + set_env_var("/irods_client/tls/certificate_chain_file", irods::KW_CFG_IRODS_SSL_CERTIFICATE_CHAIN_FILE); + set_env_var("/irods_client/tls/dh_params_file", irods::KW_CFG_IRODS_SSL_DH_PARAMS_FILE); + set_env_var("/irods_client/tls/verify_server", irods::KW_CFG_IRODS_SSL_VERIFY_SERVER, "cert"); +} // init_tls + +auto init_irods_connection_pool(const json& _config) -> std::unique_ptr +{ + const auto& client = _config.at("irods_client"); + const auto& zone = client.at("zone").get_ref(); + const auto& conn_pool = client.at("connection_pool"); + const auto& rodsadmin = client.at("proxy_admin_account"); + const auto& username = rodsadmin.at("username").get_ref(); + + irods::connection_pool_options opts; + + if (const auto iter = conn_pool.find("refresh_time_in_seconds"); iter != std::end(conn_pool)) { + opts.number_of_seconds_before_connection_refresh = std::chrono::seconds{iter->get()}; + } + + if (const auto iter = conn_pool.find("max_retrievals_before_refresh"); iter != std::end(conn_pool)) { + opts.number_of_retrievals_before_connection_refresh = iter->get(); + } + + if (const auto iter = conn_pool.find("refresh_when_resource_changes_detected"); iter != std::end(conn_pool)) { + opts.refresh_connections_when_resource_changes_detected = iter->get(); + } + + return std::make_unique( + conn_pool.at("size").get(), + client.at("host").get_ref(), + client.at("port").get(), + irods::experimental::fully_qualified_username{username, zone}, + irods::experimental::fully_qualified_username{username, zone}, + [pw = rodsadmin.at("password").get()](RcComm& _comm) mutable { + if (const auto ec = clientLoginWithPassword(&_comm, pw.data()); ec != 0) { + throw std::invalid_argument{fmt::format("Could not authenticate rodsadmin user: [{}]", ec)}; + } + }, + opts); +} // init_irods_connection_pool + +class process_stash_eviction_manager +{ + net::steady_timer timer_; + std::chrono::seconds interval_; + + public: + process_stash_eviction_manager(net::io_context& _io, std::chrono::seconds _eviction_check_interval) + : timer_{_io} + , interval_{_eviction_check_interval} + { + evict(); + } // constructor + + private: + auto evict() -> void + { + timer_.expires_after(interval_); + timer_.async_wait([this](const auto& _ec) { + if (_ec) { + return; + } + + logging::trace("Evicting expired items ..."); + irods::http::process_stash::erase_if([](const auto& _k, const auto& _v) { + const auto* client_info = boost::any_cast(&_v); + const auto erase_value = client_info && std::chrono::steady_clock::now() >= client_info->expires_at; + + if (erase_value) { + logging::debug("Evicted bearer token [{}].", _k); + } + + return erase_value; + }); + + evict(); + }); + } // evict +}; // class process_stash_eviction_manager + +auto main(int _argc, char* _argv[]) -> int +{ + po::options_description opts_desc{""}; + + // clang-format off + opts_desc.add_options() + ("config-file,f", po::value(), "") + ("jsonschema-file", po::value(), "") + ("dump-config-template", "") + ("dump-default-jsonschema", "") + ("help,h", "") + ("version,v", ""); + // clang-format on + + po::positional_options_description pod; + pod.add("config-file", 1); + + set_ips_display_name("irods_s3_api"); + + try { + po::variables_map vm; + po::store(po::command_line_parser(_argc, _argv).options(opts_desc).positional(pod).run(), vm); + po::notify(vm); + + if (vm.count("help") > 0) { + print_usage(); + return 0; + } + + if (vm.count("version") > 0) { + print_version_info(); + return 0; + } + + if (vm.count("dump-config-template") > 0) { + print_configuration_template(); + return 0; + } + + if (vm.count("dump-default-jsonschema") > 0) { + fmt::print(default_jsonschema()); + return 0; + } + + if (vm.count("config-file") == 0) { + fmt::print(stderr, "Error: Missing [CONFIG_FILE_PATH] parameter."); + return 1; + } + + const auto config = json::parse(std::ifstream{vm["config-file"].as()}); + irods::http::globals::set_configuration(config); + +#if 0 + { + const auto schema_file = (vm.count("jsonschema-file") > 0) ? vm["jsonschema-file"].as() : ""; + if (!is_valid_configuration(schema_file, vm["config-file"].as())) { + return 1; + } + } +#endif + + const auto& s3_server_config = config.at("s3_server"); + set_log_level(s3_server_config); + spdlog::set_pattern("[%Y-%m-%d %T.%e] [P:%P] [%^%l%$] [T:%t] %v"); + + logging::info("Initializing server."); + + // TODO For LONG running tasks, see the following: + // + // - https://stackoverflow.com/questions/17648725/long-running-blocking-operations-in-boost-asio-handlers + // - https://www.open-std.org/JTC1/SC22/WG21/docs/papers/2012/n3388.pdf + // + + logging::trace("Loading API plugins."); + load_client_api_plugins(); + + const auto address = net::ip::make_address(s3_server_config.at("host").get_ref()); + const auto port = s3_server_config.at("port").get(); + const auto request_thread_count = + std::max(s3_server_config.at(json::json_pointer{"/requests/threads"}).get(), 1); + + logging::trace("Initializing TLS."); + init_tls(config); + + std::unique_ptr conn_pool; + + if (!config.at(json::json_pointer{"/irods_client/enable_4_2_compatibility"}).get()) { + logging::trace("Initializing iRODS connection pool."); + conn_pool = init_irods_connection_pool(config); + irods::http::globals::set_connection_pool(*conn_pool); + } + + // The io_context is required for all I/O. + logging::trace("Initializing HTTP components."); + net::io_context ioc{request_thread_count}; + irods::http::globals::set_request_handler_io_context(ioc); + + // Create and launch a listening port. + logging::trace("Initializing listening socket (host=[{}], port=[{}]).", address.to_string(), port); + std::make_shared(ioc, tcp::endpoint{address, port}, config)->run(); + + // SIGINT and SIGTERM instruct the server to shut down. + // Ignore SIGPIPE. The iRODS networking code assumes SIGPIPE is ignored or caught. + logging::trace("Initializing signal handlers."); + net::signal_set signals{ioc, SIGINT, SIGTERM, SIGPIPE}; + + const std::function process_signals = + [&ioc, &signals, &process_signals](const beast::error_code&, int _signal) { + if (SIGPIPE == _signal) { + signals.async_wait(process_signals); + return; + } + + // Stop the io_context. This will cause run() to return immediately, eventually destroying + // the io_context and all of the sockets in it. + logging::warn("Received signal [{}]. Shutting down.", _signal); + ioc.stop(); + }; + + signals.async_wait(process_signals); + + // Launch the requested number of dedicated backgroup I/O threads. + // These threads are used for long running tasks (e.g. reading/writing bytes, database, etc.) + logging::trace("Initializing thread pool for long running I/O tasks."); + net::thread_pool io_threads( + std::max(s3_server_config.at(json::json_pointer{"/background_io/threads"}).get(), 1)); + irods::http::globals::set_background_thread_pool(io_threads); + + // Run the I/O service on the requested number of threads. + logging::trace("Initializing thread pool for HTTP requests."); + net::thread_pool request_handler_threads(request_thread_count); + for (auto i = request_thread_count - 1; i > 0; --i) { + net::post(request_handler_threads, [&ioc] { ioc.run(); }); + } + + // Launch eviction check for expired bearer tokens. + const auto eviction_check_interval = + s3_server_config.at(json::json_pointer{"/authentication/eviction_check_interval_in_seconds"}).get(); + process_stash_eviction_manager eviction_mgr{ioc, std::chrono::seconds{eviction_check_interval}}; + + logging::info("Server is ready."); + ioc.run(); + + request_handler_threads.stop(); + io_threads.stop(); + + logging::trace("Waiting for HTTP requests thread pool to shut down."); + request_handler_threads.join(); + + logging::trace("Waiting for I/O thread pool to shut down."); + io_threads.join(); + + logging::info("Shutdown complete."); + + return 0; + } + catch (const irods::exception& e) { + fmt::print(stderr, "Error: {}\n", e.client_display_what()); + } + catch (const std::exception& e) { + fmt::print(stderr, "Error: {}\n", e.what()); + } + + return 1; +} // main diff --git a/core/src/process_stash.cpp b/core/src/process_stash.cpp new file mode 100644 index 0000000..42e29f2 --- /dev/null +++ b/core/src/process_stash.cpp @@ -0,0 +1,84 @@ +#include "irods/private/s3_api/process_stash.hpp" + +#include +#include +#include + +#include +#include +#include +#include + +namespace +{ + // A mapping containing handles to heterogenous objects. + std::unordered_map g_stash; // NOLINT(cppcoreguidelines-avoid-non-const-global-variables) + + // A mutex which protects the map from data corruption. + std::shared_mutex g_mtx; // NOLINT(cppcoreguidelines-avoid-non-const-global-variables, cert-err58-cpp) + + auto generate_unique_key() -> std::string + { + std::string uuid; + constexpr auto uuid_length{36}; + uuid.reserve(uuid_length); + uuid = to_string(boost::uuids::random_generator{}()); + + while (g_stash.find(uuid) != std::end(g_stash)) { + uuid = to_string(boost::uuids::random_generator{}()); + } + + return uuid; + } // generate_unique_key +} // anonymous namespace + +namespace irods::http::process_stash +{ + auto insert(boost::any _value) -> std::string + { + std::lock_guard lock{g_mtx}; + return g_stash.insert_or_assign(generate_unique_key(), std::move(_value)).first->first; + } // insert + + auto find(const std::string& _key) -> std::optional + { + { + std::shared_lock lock{g_mtx}; + if (auto iter = g_stash.find(_key); iter != std::end(g_stash)) { + return iter->second; + } + } + + return std::nullopt; + } // find + + auto erase(const std::string& _key) -> bool + { + std::lock_guard lock{g_mtx}; + return g_stash.erase(_key); + } // erase + + auto erase_if(const std::function& _pred) -> std::size_t + { + std::lock_guard lock{g_mtx}; + return std::erase_if(g_stash, [&_pred](const auto& _item) { + const auto& [k, v] = _item; + return _pred(k, v); + }); + } // erase_if + + auto handles() -> std::vector + { + std::vector handles; + + { + std::shared_lock lock{g_mtx}; + handles.reserve(g_stash.size()); + for (const auto& [k, v] : g_stash) { + handles.push_back(k); + } + } + + return handles; + } // handles +} // namespace irods::http::process_stash diff --git a/core/src/session.cpp b/core/src/session.cpp new file mode 100644 index 0000000..17e86ac --- /dev/null +++ b/core/src/session.cpp @@ -0,0 +1,163 @@ +#include "irods/private/s3_api/session.hpp" + +#include "irods/private/s3_api/globals.hpp" +#include "irods/private/s3_api/log.hpp" + +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#ifdef IRODS_WRITE_REQUEST_TO_TEMP_FILE +# include +#endif + +namespace irods::http +{ + session::session( + boost::asio::ip::tcp::socket&& socket, + const request_handler_map_type& _request_handler_map, + int _max_body_size, + int _timeout_in_seconds) + : stream_(std::move(socket)) + , req_handlers_{&_request_handler_map} + , max_body_size_{_max_body_size} + , timeout_in_secs_{_timeout_in_seconds} + { + } // session (constructor) + + auto session::ip() const -> std::string + { + return stream_.socket().remote_endpoint().address().to_string(); + } // ip + + // Start the asynchronous operation + auto session::run() -> void + { + // We need to be executing within a strand to perform async operations + // on the I/O objects in this session. Although not strictly necessary + // for single-threaded contexts, this example code is written to be + // thread-safe by default. + boost::asio::dispatch( + stream_.get_executor(), boost::beast::bind_front_handler(&session::do_read, shared_from_this())); + } // run + + auto session::do_read() -> void + { + // Construct a new parser for each message. + parser_.emplace(); + + // Apply the limit defined in the configuration file. + parser_->body_limit(max_body_size_); + + // Set the timeout. + stream_.expires_after(std::chrono::seconds(timeout_in_secs_)); + + // Read a request. + boost::beast::http::async_read( + stream_, buffer_, *parser_, boost::beast::bind_front_handler(&session::on_read, shared_from_this())); + } // do_read + + auto session::on_read(boost::beast::error_code ec, std::size_t bytes_transferred) -> void + { + boost::ignore_unused(bytes_transferred); + + // This means they closed the connection + if (ec == boost::beast::http::error::end_of_stream) { + return do_close(); + } + + if (ec == boost::beast::http::error::body_limit) { + logging::error("{}: Request constraint error: {}", __func__, ec.message()); + return; + } + + if (ec) { + return irods::fail(ec, "read"); + } + + // + // Process client request and send a response. + // + + auto req_ = parser_->release(); + + // Print the headers. + for (auto&& h : req_.base()) { + logging::debug("{}: Header: ({}, {})", __func__, h.name_string(), h.value()); + } + + // Print the components of the request URL. + logging::debug("{}: Method: {}", __func__, req_.method_string()); + logging::debug("{}: Version: {}", __func__, req_.version()); + logging::debug("{}: Target: {}", __func__, req_.target()); + logging::debug("{}: Keep Alive: {}", __func__, req_.keep_alive()); + logging::debug("{}: Has Content Length: {}", __func__, req_.has_content_length()); + logging::debug("{}: Chunked: {}", __func__, req_.chunked()); + logging::debug("{}: Needs EOF: {}", __func__, req_.need_eof()); + + namespace http = boost::beast::http; + + try { +#ifdef IRODS_WRITE_REQUEST_TO_TEMP_FILE + std::ofstream{"/tmp/http_request.txt"}.write(req_.body().c_str(), (std::streamsize) req_.body().size()); +#endif + + // "host" is a placeholder that's used so that get_url_path() can parse the URL correctly. + const auto path = irods::http::get_url_path(fmt::format("http://host{}", req_.target())); + if (!path) { + send(irods::http::fail(http::status::bad_request)); + return; + } + + if (const auto iter = req_handlers_->find(*path); iter != std::end(*req_handlers_)) { + (iter->second)(shared_from_this(), req_); + return; + } + + send(irods::http::fail(http::status::not_found)); + } + catch (const std::exception& e) { + logging::error("{}: {}", __func__, e.what()); + send(irods::http::fail(http::status::internal_server_error)); + } + } // on_read + + auto session::on_write(bool close, boost::beast::error_code ec, std::size_t bytes_transferred) -> void + { + boost::ignore_unused(bytes_transferred); + + if (ec) { + return irods::fail(ec, "write"); + } + + if (close) { + // This means we should close the connection, usually because + // the response indicated the "Connection: close" semantic. + return do_close(); + } + + // We're done with the response so delete it + res_ = nullptr; + + // Read another request + do_read(); + } // on_write + + auto session::do_close() -> void + { + // Send a TCP shutdown. + boost::beast::error_code ec; + stream_.socket().shutdown(boost::asio::ip::tcp::socket::shutdown_send, ec); + + // At this point the connection is closed gracefully. + } // do_close +} // namespace irods::http diff --git a/core/src/transport.cpp b/core/src/transport.cpp new file mode 100644 index 0000000..8c643d7 --- /dev/null +++ b/core/src/transport.cpp @@ -0,0 +1,153 @@ +#include "irods/private/s3_api/transport.hpp" + +#include "irods/private/s3_api/globals.hpp" + +#include + +namespace irods::http +{ + transport::transport(boost::asio::io_context& _ctx) + : io_ctx_{_ctx} + { + } + + auto transport::connect(std::string_view _host, std::string_view _port) -> void + { + auto res{resolve(_host, _port)}; + do_connect(res); + did_connect_ = true; + } + + auto transport::is_connected() const noexcept -> bool + { + return did_connect_; + } + + auto transport::communicate(boost::beast::http::request& _request) + -> boost::beast::http::response + { + do_write(_request); + return do_read(); + } + + auto transport::resolve(std::string_view _host, std::string_view _port) + -> boost::asio::ip::tcp::resolver::results_type + { + boost::asio::ip::tcp::resolver tcp_res{io_ctx_}; + return tcp_res.resolve(_host, _port); + } + + tls_transport::tls_transport(boost::asio::io_context& _ctx, boost::asio::ssl::context& _secure_ctx) + : transport{_ctx} + , stream_{_ctx, _secure_ctx} + { + } + + tls_transport::~tls_transport() + { + if (is_connected()) { + disconnect(); + } + } + + auto tls_transport::resolve(std::string_view _host, std::string_view _port) + -> boost::asio::ip::tcp::resolver::results_type + { + set_sni_hostname(_host); + return transport::resolve(_host, _port); + } + + auto tls_transport::do_connect(boost::asio::ip::tcp::resolver::results_type& _resolved_host) -> void + { + boost::beast::get_lowest_layer(stream_).connect(_resolved_host); + stream_.handshake(boost::asio::ssl::stream_base::client); + } + + auto tls_transport::do_write(boost::beast::http::request& _request) -> void + { + boost::beast::http::write(stream_, _request); + } + + auto tls_transport::do_read() -> boost::beast::http::response + { + boost::beast::flat_buffer buffer; + boost::beast::http::response res; + boost::beast::http::read(stream_, buffer, res); + + return res; + } + + auto tls_transport::disconnect() -> void + { + boost::beast::error_code ec; + stream_.shutdown(ec); + } + + auto tls_transport::set_sni_hostname(std::string_view _host) -> void + { + // Set SNI Hostname (many hosts need this to handshake successfully) + if (!SSL_set_tlsext_host_name(stream_.native_handle(), _host.data())) { + boost::beast::error_code ec{static_cast(::ERR_get_error()), boost::asio::error::get_ssl_category()}; + throw boost::beast::system_error{ec}; + } + } + + plain_transport::plain_transport(boost::asio::io_context& _ctx) + : transport{_ctx} + , stream_{_ctx} + { + } + + plain_transport::~plain_transport() + { + if (is_connected()) { + disconnect(); + } + } + + auto plain_transport::do_connect(boost::asio::ip::tcp::resolver::results_type& _resolved_host) -> void + { + stream_.connect(_resolved_host); + } + + auto plain_transport::do_write(boost::beast::http::request& _request) -> void + { + boost::beast::http::write(stream_, _request); + } + + auto plain_transport::do_read() -> boost::beast::http::response + { + boost::beast::flat_buffer buffer; + boost::beast::http::response res; + boost::beast::http::read(stream_, buffer, res); + + return res; + } + + auto plain_transport::disconnect() -> void + { + boost::beast::error_code ec; + stream_.socket().shutdown(boost::asio::ip::tcp::socket::shutdown_both, ec); + } + + auto make_secure_context() -> boost::asio::ssl::context + { + boost::asio::ssl::context ctx{boost::asio::ssl::context::tlsv12_client}; + ctx.set_default_verify_paths(); + ctx.set_verify_mode(boost::asio::ssl::verify_peer); + return ctx; + } + + auto transport_factory(const boost::urls::scheme& _scheme, boost::asio::io_context& _ctx) + -> std::unique_ptr + { + if (_scheme == boost::urls::scheme::http) { + return std::make_unique(_ctx); + } + if (_scheme == boost::urls::scheme::https) { + auto secure_context{make_secure_context()}; + return std::make_unique(_ctx, secure_context); + } + throw std::invalid_argument{"Scheme is not a supported."}; + } +} //namespace irods::http diff --git a/endpoints/CMakeLists.txt b/endpoints/CMakeLists.txt new file mode 100644 index 0000000..42d17db --- /dev/null +++ b/endpoints/CMakeLists.txt @@ -0,0 +1,2 @@ +add_subdirectory(authentication) +add_subdirectory(put_object) diff --git a/endpoints/authentication/CMakeLists.txt b/endpoints/authentication/CMakeLists.txt new file mode 100644 index 0000000..53a9c24 --- /dev/null +++ b/endpoints/authentication/CMakeLists.txt @@ -0,0 +1,33 @@ +add_library( + irods_s3_api_endpoint_authentication + OBJECT + "${CMAKE_CURRENT_SOURCE_DIR}/src/main.cpp" +) + +target_compile_definitions( + irods_s3_api_endpoint_authentication + PRIVATE + ${IRODS_COMPILE_DEFINITIONS} + ${IRODS_COMPILE_DEFINITIONS_PRIVATE} +) + +target_link_libraries( + irods_s3_api_endpoint_authentication + PRIVATE + irods_client + CURL::libcurl + nlohmann_json::nlohmann_json +) + +target_include_directories( + irods_s3_api_endpoint_authentication + PRIVATE + "${IRODS_S3_API_PROJECT_SOURCE_DIR}/core/include" + "${IRODS_S3_API_PROJECT_BINARY_DIR}/core/include" + "${IRODS_S3_API_PROJECT_SOURCE_DIR}/endpoints/shared/include" + "${IRODS_EXTERNALS_FULLPATH_BOOST}/include" + "${IRODS_EXTERNALS_FULLPATH_FMT}/include" + "${IRODS_EXTERNALS_FULLPATH_SPDLOG}/include" +) + +set_target_properties(irods_s3_api_endpoint_authentication PROPERTIES EXCLUDE_FROM_ALL TRUE) diff --git a/endpoints/authentication/src/main.cpp b/endpoints/authentication/src/main.cpp new file mode 100644 index 0000000..b9877f8 --- /dev/null +++ b/endpoints/authentication/src/main.cpp @@ -0,0 +1,227 @@ +#include "irods/private/s3_api/handlers.hpp" + +#include "irods/private/s3_api/common.hpp" +#include "irods/private/s3_api/globals.hpp" +#include "irods/private/s3_api/log.hpp" +#include "irods/private/s3_api/process_stash.hpp" +#include "irods/private/s3_api/session.hpp" +#include "irods/private/s3_api/transport.hpp" +#include "irods/private/s3_api/version.hpp" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +// clang-format off +namespace beast = boost::beast; // from +namespace net = boost::asio; // from +// clang-format on + +namespace irods::http::handler +{ + auto decode_username_and_password(std::string_view _encoded_data) -> std::pair + { + std::string authorization{_encoded_data}; + boost::trim(authorization); + logging::debug("{}: Authorization value (trimmed): [{}]", __func__, authorization); + + constexpr auto max_creds_size = 128; + std::uint64_t size{max_creds_size}; + std::array creds{}; + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + const auto ec = irods::base64_decode( + reinterpret_cast(authorization.data()), authorization.size(), creds.data(), &size); + logging::debug("{}: base64 - error code=[{}], decoded size=[{}]", __func__, ec, size); + + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + std::string_view sv{reinterpret_cast(creds.data()), size}; + + const auto colon = sv.find(':'); + if (colon == std::string_view::npos) { + return {"", ""}; + } + + std::string username{sv.substr(0, colon)}; + std::string password{sv.substr(colon + 1)}; + + return {std::move(username), std::move(password)}; + } + + IRODS_S3_API_ENDPOINT_ENTRY_FUNCTION_SIGNATURE(authentication) + { + if (_req.method() != boost::beast::http::verb::post) { + logging::error("{}: HTTP method not supported.", __func__); + return _sess_ptr->send(fail(status_type::method_not_allowed)); + } + + irods::http::globals::background_task([fn = __func__, _sess_ptr, _req = std::move(_req)] { + const auto& hdrs{_req.base()}; + const auto iter{hdrs.find("authorization")}; + + if (iter == std::end(hdrs)) { + return _sess_ptr->send(fail(status_type::bad_request)); + } + + logging::debug("{}: Authorization value: [{}]", fn, iter->value()); + + const auto pos = iter->value().find("Basic "); + if (pos == std::string_view::npos) { + return _sess_ptr->send(fail(status_type::bad_request)); + } + + constexpr auto basic_auth_scheme_prefix_size = 6; + auto [username, password]{ + decode_username_and_password(iter->value().substr(pos + basic_auth_scheme_prefix_size))}; + + static const auto seconds = + irods::http::globals::configuration() + .at(nlohmann::json::json_pointer{"/s3_server/authentication/basic/timeout_in_seconds"}) + .get(); + + // The anonymous user account must be handled in a special way because rc_check_auth_credentials + // doesn't support it. To get around that, the S3 API will return a bearer token whenever the + // anonymous user is seen. If the iRODS zone doesn't contain an anonymous user, any request sent + // by the client will result in an error. + // + // The error will occur when rc_switch_user is invoked on the non-existent user. + if ("anonymous" == username && password.empty()) { + logging::trace("{}: Detected the anonymous user account. Skipping auth check and returning token.", fn); + + auto bearer_token = irods::http::process_stash::insert(authenticated_client_info{ + .auth_scheme = authorization_scheme::basic, + .username = std::move(username), + .expires_at = std::chrono::steady_clock::now() + std::chrono::seconds{seconds}}); + + response_type res{status_type::ok, _req.version()}; + res.set(field_type::server, irods::s3::version::server_name); + res.set(field_type::content_type, "text/plain"); + res.keep_alive(_req.keep_alive()); + res.body() = std::move(bearer_token); + res.prepare_payload(); + + return _sess_ptr->send(std::move(res)); + } + + if (username.empty() || password.empty()) { + return _sess_ptr->send(fail(status_type::unauthorized)); + } + + bool login_successful = false; + + try { + using json_pointer = nlohmann::json::json_pointer; + + static const auto& config = irods::http::globals::configuration(); + static const auto& rodsadmin_username = + config.at(json_pointer{"/irods_client/proxy_admin_account/username"}).get_ref(); + static const auto& rodsadmin_password = + config.at(json_pointer{"/irods_client/proxy_admin_account/password"}).get_ref(); + static const auto& zone = config.at(json_pointer{"/irods_client/zone"}).get_ref(); + + if (config.at(json_pointer{"/irods_client/enable_4_2_compatibility"}).get()) { + // When operating in 4.2 compatibility mode, all we can do is create a new iRODS connection + // and authenticate using the client's username and password. iRODS 4.2 does not provide an + // API for checking native authentication credentials. + + const auto& host = config.at(json_pointer{"/irods_client/host"}).get_ref(); + const auto port = config.at(json_pointer{"/irods_client/port"}).get(); + + irods::experimental::client_connection conn{ + irods::experimental::defer_authentication, host, port, {username, zone}}; + + login_successful = (clientLoginWithPassword(static_cast(conn), password.data()) == 0); + } + else { + // If we're in this branch, assume we're talking to an iRODS 4.3.1+ server. Therefore, we + // can use existing iRODS connections to verify the correctness of client provided + // credentials for native authentication. + + CheckAuthCredentialsInput input{}; + username.copy(input.username, sizeof(CheckAuthCredentialsInput::username)); + zone.copy(input.zone, sizeof(CheckAuthCredentialsInput::zone)); + + namespace adm = irods::experimental::administration; + const adm::user_password_property prop{password, rodsadmin_password}; + const auto obfuscated_password = irods::experimental::administration::obfuscate_password(prop); + obfuscated_password.copy(input.password, sizeof(CheckAuthCredentialsInput::password)); + + int* correct{}; + + // NOLINTNEXTLINE(cppcoreguidelines-owning-memory, cppcoreguidelines-no-malloc) + irods::at_scope_exit free_memory{[&correct] { std::free(correct); }}; + + auto conn = irods::get_connection(rodsadmin_username); + + if (const auto ec = rc_check_auth_credentials(static_cast(conn), &input, &correct); ec < 0) + { + logging::error( + "{}: Error verifying native authentication credentials for user [{}]: error code " + "[{}].", + fn, + username, + ec); + } + else { + logging::debug("{}: correct = [{}]", fn, fmt::ptr(correct)); + logging::debug("{}: *correct = [{}]", fn, (correct ? *correct : -1)); + login_successful = (correct && 1 == *correct); + } + } + } + catch (const irods::exception& e) { + logging::error( + "{}: Error verifying native authentication credentials for user [{}]: {}", + fn, + username, + e.client_display_what()); + } + catch (const std::exception& e) { + logging::error( + "{}: Error verifying native authentication credentials for user [{}]: {}", fn, username, e.what()); + } + + if (!login_successful) { + return _sess_ptr->send(fail(status_type::unauthorized)); + } + + auto bearer_token = irods::http::process_stash::insert(authenticated_client_info{ + .auth_scheme = authorization_scheme::basic, + .username = std::move(username), + .expires_at = std::chrono::steady_clock::now() + std::chrono::seconds{seconds}}); + + response_type res{status_type::ok, _req.version()}; + res.set(field_type::server, irods::s3::version::server_name); + res.set(field_type::content_type, "text/plain"); + res.keep_alive(_req.keep_alive()); + res.body() = std::move(bearer_token); + res.prepare_payload(); + + return _sess_ptr->send(std::move(res)); + }); + } // authentication +} //namespace irods::http::handler diff --git a/endpoints/put_object/CMakeLists.txt b/endpoints/put_object/CMakeLists.txt new file mode 100644 index 0000000..138313c --- /dev/null +++ b/endpoints/put_object/CMakeLists.txt @@ -0,0 +1,33 @@ +add_library( + irods_s3_api_endpoint_put_object + OBJECT + "${CMAKE_CURRENT_SOURCE_DIR}/src/main.cpp" +) + +target_compile_definitions( + irods_s3_api_endpoint_put_object + PRIVATE + ${IRODS_COMPILE_DEFINITIONS} + ${IRODS_COMPILE_DEFINITIONS_PRIVATE} +) + +target_link_libraries( + irods_s3_api_endpoint_put_object + PRIVATE + irods_client + CURL::libcurl + nlohmann_json::nlohmann_json +) + +target_include_directories( + irods_s3_api_endpoint_put_object + PRIVATE + "${IRODS_S3_API_PROJECT_SOURCE_DIR}/core/include" + "${IRODS_S3_API_PROJECT_BINARY_DIR}/core/include" + "${IRODS_S3_API_PROJECT_SOURCE_DIR}/endpoints/shared/include" + "${IRODS_EXTERNALS_FULLPATH_BOOST}/include" + "${IRODS_EXTERNALS_FULLPATH_FMT}/include" + "${IRODS_EXTERNALS_FULLPATH_SPDLOG}/include" +) + +set_target_properties(irods_s3_api_endpoint_put_object PROPERTIES EXCLUDE_FROM_ALL TRUE) diff --git a/endpoints/put_object/src/main.cpp b/endpoints/put_object/src/main.cpp new file mode 100644 index 0000000..7da19cf --- /dev/null +++ b/endpoints/put_object/src/main.cpp @@ -0,0 +1,109 @@ +#include "irods/private/s3_api/handlers.hpp" + +#include "irods/private/s3_api/common.hpp" +#include "irods/private/s3_api/globals.hpp" +#include "irods/private/s3_api/log.hpp" +#include "irods/private/s3_api/session.hpp" +#include "irods/private/s3_api/version.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include // For make_error_code +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +// clang-format off +namespace beast = boost::beast; // from +namespace http = beast::http; // from +namespace net = boost::asio; // from + +namespace fs = irods::experimental::filesystem; +namespace logging = irods::http::logging; +// clang-format on + +#define IRODS_S3_API_ENDPOINT_OPERATION_SIGNATURE(name) \ + auto name( \ + irods::http::session_pointer_type _sess_ptr, \ + irods::http::request_type& _req, \ + irods::http::query_arguments_type& _args) \ + ->void + +namespace +{ + // + // Handler function prototypes + // + + IRODS_S3_API_ENDPOINT_OPERATION_SIGNATURE(op_test); + + // + // Operation to Handler mappings + // + + // clang-format off + const std::unordered_map handlers_for_get{ + {"test_get", op_test} + }; + + const std::unordered_map handlers_for_post{ + {"test_put", op_test} + }; + // clang-format on +} // anonymous namespace + +namespace irods::http::handler +{ + // NOLINTNEXTLINE(performance-unnecessary-value-param) + IRODS_S3_API_ENDPOINT_ENTRY_FUNCTION_SIGNATURE(put_object) + { + execute_operation(_sess_ptr, _req, handlers_for_get, handlers_for_post); + } // put_object +} // namespace irods::http::handler + +namespace +{ + // + // Operation handler implementations + // + + IRODS_S3_API_ENDPOINT_OPERATION_SIGNATURE(op_test) + { + auto result = irods::http::resolve_client_identity(_req); + if (result.response) { + return _sess_ptr->send(std::move(*result.response)); + } + + const auto client_info = result.client_info; + + irods::http::globals::background_task( + [fn = __func__, client_info, _sess_ptr, _req = std::move(_req), _args = std::move(_args)] { + logging::info("{}: client_info.username = [{}]", fn, client_info.username); + + http::response res{http::status::ok, _req.version()}; + res.set(http::field::server, irods::s3::version::server_name); + res.set(http::field::content_type, "application/json"); + res.keep_alive(_req.keep_alive()); + + res.body() = "THIS IS A TEST!"; + res.prepare_payload(); + + return _sess_ptr->send(std::move(res)); + }); + } // op_test +} // anonymous namespace diff --git a/endpoints/shared/include/irods/private/s3_api/handlers.hpp b/endpoints/shared/include/irods/private/s3_api/handlers.hpp new file mode 100644 index 0000000..6d0aa16 --- /dev/null +++ b/endpoints/shared/include/irods/private/s3_api/handlers.hpp @@ -0,0 +1,20 @@ +#ifndef IRODS_S3_API_HANDLERS_HPP +#define IRODS_S3_API_HANDLERS_HPP + +#include "irods/private/s3_api/common.hpp" + +#ifndef IRODS_S3_API_ENDPOINT_ENTRY_FUNCTION_SIGNATURE +// Enables all endpoint function signatures for declarations and definitions to be +// updated from one location. +# define IRODS_S3_API_ENDPOINT_ENTRY_FUNCTION_SIGNATURE(name) \ + auto name(session_pointer_type _sess_ptr, request_type& _req)->void +#endif // IRODS_S3_API_ENDPOINT_ENTRY_FUNCTION_SIGNATURE + +namespace irods::http::handler +{ + IRODS_S3_API_ENDPOINT_ENTRY_FUNCTION_SIGNATURE(authentication); + + IRODS_S3_API_ENDPOINT_ENTRY_FUNCTION_SIGNATURE(put_object); +} // namespace irods::http::handler + +#endif // IRODS_S3_API_HANDLERS_HPP