From 6c196fb62dbe98f23132923464140714f817c65f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9E=9C=E5=86=BB=E8=99=BE=E4=BB=81?= Date: Tue, 22 Nov 2022 01:23:25 +0800 Subject: [PATCH] Linux workflow (#14) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * enable brpc use rdma * Fix override issue in pb 3.21 * fix rpc_replay can't send request equably (#1910) * fix rpc_replay can't send request equably * 类型修改 * fix coredump cause by uri like 'host:port/hotspots/growth_non_responsive?console=abc' (#1278) * expose logging::PrintLog * Support -escape_log * Fix thrift_message pb override issue * Chore: rework Bazel build system * remove white space from default value of bvar_dump_tabs * Update oncall.md * Fix not to abort when checking the errorno with unicode string (#1142) * fix a typo in grpc protocol (#1924) * fix a typo in grpc protocol * ERESPONSE->EREQUEST Co-authored-by: 薛传宇 * [user-cases] add Apache Doris user case * add nacos naming service (#1922) * [document] Add vcpkg instruction step (#1925) * http response uses brpc error code (#1927) * http response uses brpc error code * add gflag for using http error code * add unit test of http error code * Update Oncall record * Fix bvar compile error (#1937) * Fix bug butex_wait failed with timeout (#1917) * Fix bug butex_wait failed with timeout Co-authored-by: XiguoHu * fix issues in FlatMap * Update release_cn.md * brpc在BaikalDB中的应用 * Update getting_started.md * fix(rpc_replay) continue when failed to init channel (#1938) * fix(rpc_replay) continue when failed to init channel * check supported_connection_type * check supported_connection_type * check supported_connection_type * fix lint * Update cases.md (#1944) * fix rpc_press.md (#1942) Signed-off-by: fan Signed-off-by: fan * fix typo in json2pb doc (#1939) * Update oncall.md (#1949) * Update release_cn.md * Update RELEASE_VERSION * Update CMakeLists.txt * Update brpc.spec * Update release_cn.md * Update release_cn.md * Update release_cn.md * add pull_request_template.md (#1952) Signed-off-by: fan Signed-off-by: fan * Fix the linkage errors caused by duplicate symbols (#1936) * Fix "sched_to itself" error when buidling by Clang on Linux aarch64 (#1950) * docs: fix some typos Signed-off-by: cui fliter * Fix source file mode * rpm: support RHEL9 * Update oncall.md * Update newcommitter.md * fix arena cleared early when parse redis message * community: Update oncall.md (#1960) Co-authored-by: lei.li * Reduce UT log output * Update release_cn.md * Update release_cn.md Update brpc's brief introduction in Announce mail. * Update release_cn.md * Macos workflow (#10) * fix typo * delete bazel from mac workflow * fix exceptation value for mac ut * delete test * Create ci_linux.yml * Update ci_linux.yml * Update ci_linux.yml * Update ci_linux.yml * Update ci_linux.yml * Update ci_linux.yml * Update ci_linux.yml * Update ci_linux.yml * Update ci_linux.yml * Update ci_linux.yml * Update ci_linux.yml Signed-off-by: fan Signed-off-by: cui fliter Co-authored-by: Tuvie Co-authored-by: wwbmmm Co-authored-by: bumingchun Co-authored-by: Yingchun Lai Co-authored-by: gejun.0 Co-authored-by: Jiashun Zhu Co-authored-by: Shuai Zhang Co-authored-by: yyweii Co-authored-by: tobe Co-authored-by: bbbezxcy Co-authored-by: 薛传宇 Co-authored-by: morningman Co-authored-by: Jack·Boos·Yu <47264268+JackBoosY@users.noreply.github.com> Co-authored-by: Tanzhongyi(Jerry Tan) Co-authored-by: chenBright Co-authored-by: lei he Co-authored-by: Chengx Co-authored-by: HU Co-authored-by: XiguoHu Co-authored-by: Tao Liu Co-authored-by: day253 <9634619+day253@users.noreply.github.com> Co-authored-by: ds Co-authored-by: fan <75058860+fansehep@users.noreply.github.com> Co-authored-by: serverglen Co-authored-by: Adonis Ling Co-authored-by: cui fliter Co-authored-by: Xiaofeng Wang Co-authored-by: jiumei Co-authored-by: LorinLee Co-authored-by: lei.li --- .bazelrc | 35 +- .bazelversion | 1 + .github/pull_request_template.md | 20 + .github/workflows/ci_linux.yml | 45 + .github/workflows/ci_macos.yml | 52 + .travis.yml | 2 +- BUILD.bazel | 285 +-- CMakeLists.txt | 23 +- Makefile | 2 +- RELEASE_VERSION | 2 +- WORKSPACE | 289 ++- bazel/{brpc.bzl => BUILD.bazel} | 29 +- bazel/config/BUILD.bazel | 105 + bazel/third_party/BUILD.bazel | 17 + bazel/third_party/crc32c/BUILD.bazel | 17 + bazel/third_party/crc32c/crc32c.BUILD | 93 + bazel/third_party/event/BUILD.bazel | 17 + bazel/third_party/event/event.BUILD | 59 + .../0001-mark-override-resolve-warning.patch | 36 + bazel/third_party/glog/BUILD.bazel | 17 + bazel/third_party/leveldb/BUILD.bazel | 21 + bazel/third_party/leveldb/leveldb.BUILD | 72 + bazel/third_party/leveldb/port.h | 34 + bazel/third_party/leveldb/port_config.h | 38 + bazel/third_party/openssl/BUILD.bazel | 17 + bazel/third_party/openssl/openssl.BUILD | 165 ++ bazel/third_party/protobuf/BUILD.bazel | 17 + bazel/third_party/protobuf/protobuf.BUILD | 498 +++++ bazel/third_party/snappy/BUILD.bazel | 17 + bazel/third_party/snappy/snappy.BUILD | 122 + bazel/third_party/thrift/BUILD.bazel | 17 + bazel/third_party/thrift/thrift.BUILD | 75 + bazel/third_party/zlib/BUILD.bazel | 17 + bazel/third_party/zlib/zlib.BUILD | 111 + community/cases.md | 20 +- community/newcommitter.md | 5 +- community/oncall.md | 6 + community/release_cn.md | 46 +- config_brpc.sh | 16 +- docs/cn/bvar_c++.md | 2 +- docs/cn/client.md | 26 + docs/cn/execution_queue.md | 2 +- docs/cn/getting_started.md | 11 + docs/cn/http_client.md | 2 +- docs/cn/http_service.md | 2 +- docs/cn/io.md | 2 +- docs/cn/json2pb.md | 2 +- docs/cn/rdma.md | 60 + docs/cn/rpc_press.md | 1 + docs/cn/streaming_rpc.md | 2 +- docs/en/client.md | 26 + docs/en/getting_started.md | 12 + docs/en/http_client.md | 2 +- docs/en/memcache_client.md | 2 +- docs/en/overview.md | 2 +- docs/en/rdma.md | 60 + docs/en/status.md | 2 +- docs/en/streaming_rpc.md | 2 +- example/{BUILD => BUILD.bazel} | 9 +- example/build_with_old_bazel/.bazelrc | 16 + example/build_with_old_bazel/.bazelversion | 16 + example/build_with_old_bazel/BUILD.bazel | 28 + example/build_with_old_bazel/WORKSPACE | 134 ++ .../build_with_old_bazel/leveldb.BUILD | 0 .../build_with_old_bazel/openssl.BUILD | 15 + example/build_with_old_bazel/zlib.BUILD | 23 + example/rdma_performance/CMakeLists.txt | 150 ++ example/rdma_performance/Makefile | 98 + example/rdma_performance/client.cpp | 310 +++ example/rdma_performance/server.cpp | 84 + example/rdma_performance/test.proto | 33 + glog.BUILD | 171 -- package/rpm/brpc.spec | 28 +- src/brpc/acceptor.cpp | 15 +- src/brpc/acceptor.h | 4 + src/brpc/builtin/hotspots_service.cpp | 1 - src/brpc/channel.cpp | 36 +- src/brpc/channel.h | 4 + src/brpc/controller.cpp | 4 + src/brpc/controller.h | 0 src/brpc/details/naming_service_thread.cpp | 3 +- src/brpc/details/naming_service_thread.h | 4 +- src/brpc/errno.proto | 4 + src/brpc/esp_message.h | 2 +- src/brpc/event_dispatcher.h | 1 + src/brpc/global.cpp | 3 + src/brpc/input_messenger.cpp | 260 ++- src/brpc/input_messenger.h | 28 + src/brpc/memcache.h | 4 +- src/brpc/nshead_message.h | 2 +- src/brpc/pb_compat.h | 6 + src/brpc/periodic_naming_service.cpp | 6 +- src/brpc/periodic_naming_service.h | 2 + src/brpc/policy/http_rpc_protocol.cpp | 16 +- src/brpc/policy/nacos_naming_service.cpp | 289 +++ src/brpc/policy/nacos_naming_service.h | 67 + src/brpc/policy/redis_protocol.cpp | 4 +- src/brpc/protocol.h | 0 src/brpc/rdma/block_pool.cpp | 561 +++++ src/brpc/rdma/block_pool.h | 104 + src/brpc/rdma/rdma_endpoint.cpp | 1454 ++++++++++++ src/brpc/rdma/rdma_endpoint.h | 258 +++ src/brpc/rdma/rdma_helper.cpp | 636 ++++++ src/brpc/rdma/rdma_helper.h | 84 + src/brpc/redis.h | 4 +- src/brpc/redis_command.cpp | 4 + src/brpc/redis_command.h | 1 + src/brpc/serialized_request.h | 2 +- src/brpc/server.cpp | 37 + src/brpc/server.h | 6 +- src/brpc/socket.cpp | 114 +- src/brpc/socket.h | 22 + src/brpc/socket_inl.h | 1 + src/brpc/socket_map.cpp | 9 +- src/brpc/socket_map.h | 19 +- src/brpc/thrift_message.h | 11 +- src/bthread/butex.cpp | 18 +- src/bthread/task_group.cpp | 6 + src/butil/class_name.h | 4 +- src/butil/containers/flat_map_inl.h | 14 +- src/butil/errno.cpp | 3 +- src/butil/logging.cc | 37 +- src/butil/logging.h | 5 + src/butil/recordio.cc | 0 src/butil/recordio.h | 0 src/bvar/mvariable.cpp | 2 +- src/bvar/variable.cpp | 14 +- test/BUILD.bazel | 123 +- test/CMakeLists.txt | 2 +- test/brpc_block_pool_unittest.cpp | 219 ++ test/brpc_naming_service_unittest.cpp | 141 ++ test/brpc_rdma_unittest.cpp | 1954 +++++++++++++++++ test/brpc_server_unittest.cpp | 157 ++ test/bthread_cond_bug_unittest.cpp | 141 ++ test/bvar_recorder_unittest.cpp | 2 +- test/echo.proto | 5 + tools/BUILD | 2 +- tools/rpc_replay/rpc_replay.cpp | 65 +- zlib.BUILD | 8 - 139 files changed, 10105 insertions(+), 704 deletions(-) create mode 100644 .bazelversion create mode 100644 .github/pull_request_template.md create mode 100644 .github/workflows/ci_linux.yml create mode 100644 .github/workflows/ci_macos.yml rename bazel/{brpc.bzl => BUILD.bazel} (52%) create mode 100644 bazel/config/BUILD.bazel create mode 100644 bazel/third_party/BUILD.bazel create mode 100644 bazel/third_party/crc32c/BUILD.bazel create mode 100644 bazel/third_party/crc32c/crc32c.BUILD create mode 100644 bazel/third_party/event/BUILD.bazel create mode 100644 bazel/third_party/event/event.BUILD create mode 100644 bazel/third_party/glog/0001-mark-override-resolve-warning.patch create mode 100644 bazel/third_party/glog/BUILD.bazel create mode 100644 bazel/third_party/leveldb/BUILD.bazel create mode 100644 bazel/third_party/leveldb/leveldb.BUILD create mode 100644 bazel/third_party/leveldb/port.h create mode 100644 bazel/third_party/leveldb/port_config.h create mode 100644 bazel/third_party/openssl/BUILD.bazel create mode 100644 bazel/third_party/openssl/openssl.BUILD create mode 100644 bazel/third_party/protobuf/BUILD.bazel create mode 100644 bazel/third_party/protobuf/protobuf.BUILD create mode 100644 bazel/third_party/snappy/BUILD.bazel create mode 100644 bazel/third_party/snappy/snappy.BUILD create mode 100644 bazel/third_party/thrift/BUILD.bazel create mode 100644 bazel/third_party/thrift/thrift.BUILD create mode 100644 bazel/third_party/zlib/BUILD.bazel create mode 100644 bazel/third_party/zlib/zlib.BUILD create mode 100644 docs/cn/rdma.md create mode 100644 docs/en/rdma.md rename example/{BUILD => BUILD.bazel} (91%) create mode 100644 example/build_with_old_bazel/.bazelrc create mode 100644 example/build_with_old_bazel/.bazelversion create mode 100644 example/build_with_old_bazel/BUILD.bazel create mode 100644 example/build_with_old_bazel/WORKSPACE rename leveldb.BUILD => example/build_with_old_bazel/leveldb.BUILD (100%) rename openssl.BUILD => example/build_with_old_bazel/openssl.BUILD (51%) create mode 100644 example/build_with_old_bazel/zlib.BUILD create mode 100644 example/rdma_performance/CMakeLists.txt create mode 100644 example/rdma_performance/Makefile create mode 100644 example/rdma_performance/client.cpp create mode 100644 example/rdma_performance/server.cpp create mode 100644 example/rdma_performance/test.proto delete mode 100644 glog.BUILD mode change 100755 => 100644 src/brpc/channel.cpp mode change 100755 => 100644 src/brpc/controller.h mode change 100755 => 100644 src/brpc/global.cpp create mode 100644 src/brpc/policy/nacos_naming_service.cpp create mode 100644 src/brpc/policy/nacos_naming_service.h mode change 100755 => 100644 src/brpc/protocol.h create mode 100644 src/brpc/rdma/block_pool.cpp create mode 100644 src/brpc/rdma/block_pool.h create mode 100644 src/brpc/rdma/rdma_endpoint.cpp create mode 100644 src/brpc/rdma/rdma_endpoint.h create mode 100644 src/brpc/rdma/rdma_helper.cpp create mode 100644 src/brpc/rdma/rdma_helper.h mode change 100755 => 100644 src/butil/recordio.cc mode change 100755 => 100644 src/butil/recordio.h create mode 100644 test/brpc_block_pool_unittest.cpp create mode 100644 test/brpc_rdma_unittest.cpp create mode 100644 test/bthread_cond_bug_unittest.cpp delete mode 100644 zlib.BUILD diff --git a/.bazelrc b/.bazelrc index 8ae4f0d482..f3d406b061 100644 --- a/.bazelrc +++ b/.bazelrc @@ -13,12 +13,29 @@ # See the License for the specific language governing permissions and # limitations under the License. -build --copt -DHAVE_ZLIB=1 -# bazel build with glog -# build --define=with_glog=true -build -c opt -build --incompatible_disable_deprecated_attr_params=false -build --incompatible_new_actions_api=false -# unittest -test --define=unittest=true -test --copt=-g +# +# Default build options. These are applied first and unconditionally. +# + +build --cxxopt="-std=c++11" +# Use gnu11 for asm keyword. +build --conlyopt="-std=gnu11" + +# Enable position independent code (this is the default on macOS and Windows) +# (Workaround for https://github.com/bazelbuild/rules_foreign_cc/issues/421) +build --copt=-fPIC +build --fission=dbg,opt +build --features=per_object_debug_info + +# We already have absl in the build, define absl=1 to tell googletest to use absl for backtrace. +build --define absl=1 + +# For brpc. +build --define=BRPC_WITH_GLOG=true +test --define=BRPC_BUILD_FOR_UNITTEST=true + +# Pass PATH, CC, CXX and LLVM_CONFIG variables from the environment. +build --action_env=CC +build --action_env=CXX +build --action_env=LLVM_CONFIG +build --action_env=PATH diff --git a/.bazelversion b/.bazelversion new file mode 100644 index 0000000000..af8c8ec7c1 --- /dev/null +++ b/.bazelversion @@ -0,0 +1 @@ +4.2.2 diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000000..030d014bd1 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,20 @@ +### What problem does this PR solve? + +Issue Number: + +Problem Summary: + +### What is changed and the side effects? + +Changed: + +Side effects: +- Performance effects(性能影响): + +- Breaking backward compatibility(向后兼容性): + +--- +### Check List: +- Please make sure your changes are compilable(请确保你的更改可以通过编译). +- When providing us with a new feature, it is best to add related tests(如果你向我们增加一个新的功能, 请添加相关测试). +- Please follow [Contributor Covenant Code of Conduct](../CODE_OF_CONDUCT.md).(请遵循贡献者准则). diff --git a/.github/workflows/ci_linux.yml b/.github/workflows/ci_linux.yml new file mode 100644 index 0000000000..feb77d32de --- /dev/null +++ b/.github/workflows/ci_linux.yml @@ -0,0 +1,45 @@ +name: Build and Test on Linux + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +env: + proc_num: $(nproc) + +jobs: + compile-with-make: + runs-on: ubuntu-latest # https://github.com/actions/runner-images + + steps: + - uses: actions/checkout@v2 + + - name: install dependences + run: | + sudo apt-get install -y git g++ make libssl-dev libgflags-dev libprotobuf-dev libprotoc-dev protobuf-compiler libleveldb-dev + - name: config_brpc + run: | + ./config_brpc.sh --header="/usr/local/include /usr/include" --libs="/usr/local/lib /usr/local/lib64 /usr/lib /usr/lib64" + - name: compile + run: | + make -j ${{env.proc_num}} + compile-with-cmake: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + + - name: install dependences + run: | + sudo apt-get install -y git g++ make libssl-dev libgflags-dev libprotobuf-dev libprotoc-dev protobuf-compiler libleveldb-dev + - name: cmake + run: | + mkdir build + cd build + cmake .. + - name: compile + run: | + cd build + make -j ${{env.proc_num}} diff --git a/.github/workflows/ci_macos.yml b/.github/workflows/ci_macos.yml new file mode 100644 index 0000000000..636d757fa4 --- /dev/null +++ b/.github/workflows/ci_macos.yml @@ -0,0 +1,52 @@ +name: Build and Test on Macos + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +env: + proc_num: $(sysctl -n hw.logicalcpu) + +jobs: + compile-with-make: + runs-on: macos-latest # https://github.com/actions/runner-images + + steps: + - uses: actions/checkout@v2 + + - name: install dependences + run: | + brew install openssl gnu-getopt coreutils gflags protobuf leveldb + + - name: config_brpc + run: | + GETOPT_PATH=$(find "/usr/local/Cellar/" -name "getopt" -type f -perm +111 -exec dirname {} \;) + export PATH=$GETOPT_PATH:$PATH + ./config_brpc.sh --header="/usr/local/include" --libs="/usr/local/lib" + + - name: compile + run: | + make -j ${{env.proc_num}} + + compile-with-cmake: + runs-on: macos-latest + + steps: + - uses: actions/checkout@v2 + + - name: install dependences + run: | + brew install openssl gnu-getopt coreutils gflags protobuf leveldb + + - name: cmake + run: | + mkdir build + cd build + cmake .. + + - name: compile + run: | + cd build + make -j ${{env.proc_num}} diff --git a/.travis.yml b/.travis.yml index 9200d9603c..96ada3853d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -40,7 +40,7 @@ before_script: - sudo bash -c "echo 'core.%e.%p' > /proc/sys/kernel/core_pattern" before_install: -- wget --no-clobber https://github.com/bazelbuild/bazel/releases/download/0.25.1/bazel_0.25.1-linux-x86_64.deb && sudo dpkg -i bazel_0.25.1-linux-x86_64.deb +- wget --no-clobber https://github.com/bazelbuild/bazel/releases/download/4.2.2/bazel_4.2.2-linux-x86_64.deb && sudo dpkg -i bazel_4.2.2-linux-x86_64.deb - sudo apt-get install automake bison flex g++ git libboost-all-dev libevent-dev libssl-dev libtool make pkg-config # thrift dependencies - wget https://archive.apache.org/dist/thrift/0.11.0/thrift-0.11.0.tar.gz && tar -xf thrift-0.11.0.tar.gz && cd thrift-0.11.0/ && ./configure --prefix=/usr --with-rs=no --with-ruby=no --with-python=no --with-java=no --with-go=no --with-perl=no --with-php=no --with-csharp=no --with-erlang=no --with-lua=no --with-nodejs=no CXXFLAGS="-Wno-unused-variable" && make -j4 && sudo make install && cd - diff --git a/BUILD.bazel b/BUILD.bazel index 0cb60fa4c7..58e8863dd2 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -13,47 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +load("@rules_proto//proto:defs.bzl", "proto_library") +load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library", "cc_proto_library", "objc_library") + licenses(["notice"]) # Apache v2 exports_files(["LICENSE"]) -load(":bazel/brpc.bzl", "brpc_proto_library") - -config_setting( - name = "with_glog", - define_values = {"with_glog": "true"}, - visibility = ["//visibility:public"], -) - -config_setting( - name = "with_mesalink", - define_values = {"with_mesalink": "true"}, - visibility = ["//visibility:public"], -) - -config_setting( - name = "with_thrift", - define_values = {"with_thrift": "true"}, - visibility = ["//visibility:public"], -) - -config_setting( - name = "unittest", - define_values = {"unittest": "true"}, -) - -config_setting( - name = "darwin", - values = {"cpu": "darwin"}, - visibility = ["//visibility:public"], -) - -config_setting( - name = "linux", - values = {"cpu": "linux"}, - visibility = ["//visibility:public"], -) - COPTS = [ "-DBTHREAD_USE_FAST_PTHREAD_MUTEX", "-D__const__=__unused__", @@ -65,21 +31,27 @@ COPTS = [ "-D__STDC_CONSTANT_MACROS", "-DGFLAGS_NS=google", ] + select({ - ":with_glog": ["-DBRPC_WITH_GLOG=1"], + "//bazel/config:brpc_with_glog": ["-DBRPC_WITH_GLOG=1"], "//conditions:default": ["-DBRPC_WITH_GLOG=0"], }) + select({ - ":with_mesalink": ["-DUSE_MESALINK"], + "//bazel/config:brpc_with_mesalink": ["-DUSE_MESALINK"], "//conditions:default": [""], }) + select({ - ":with_thrift": ["-DENABLE_THRIFT_FRAMED_PROTOCOL=1"], + "//bazel/config:brpc_with_thrift": ["-DENABLE_THRIFT_FRAMED_PROTOCOL=1"], + "//conditions:default": [""], +}) + select({ + "//bazel/config:brpc_with_thrift_legacy_version": [], + "//conditions:default": ["-DTHRIFT_STDCXX=std"], +}) + select({ + "//bazel/config:brpc_with_rdma": ["-DBRPC_WITH_RDMA=1"], "//conditions:default": [""], }) LINKOPTS = [ - "-lpthread", + "-pthread", "-ldl", ] + select({ - ":darwin": [ + "@bazel_tools//tools/osx:darwin": [ "-framework CoreFoundation", "-framework CoreGraphics", "-framework CoreData", @@ -92,18 +64,17 @@ LINKOPTS = [ "-Wl,-U,_RegisterThriftProtocol", ], "//conditions:default": [ - "-lrt", + "-lrt", ], }) + select({ - ":with_mesalink": [ + "//bazel/config:brpc_with_mesalink": [ "-lmesalink", ], "//conditions:default": [], }) + select({ - ":with_thrift": [ - "-lthriftnb", - "-levent", - "-lthrift"], + "//bazel/config:brpc_with_rdma": [ + "-libverbs", + ], "//conditions:default": [], }) @@ -116,18 +87,17 @@ genrule( // This file is auto-generated. #ifndef BUTIL_CONFIG_H #define BUTIL_CONFIG_H - #ifdef BRPC_WITH_GLOG #undef BRPC_WITH_GLOG #endif #define BRPC_WITH_GLOG """ + select({ - ":with_glog": "1", - "//conditions:default": "0", -}) + -""" + "//bazel/config:brpc_with_glog": "1", + "//conditions:default": "0", + }) + + """ #endif // BUTIL_CONFIG_H EOF - """ + """, ) BUTIL_SRCS = [ @@ -244,20 +214,20 @@ BUTIL_SRCS = [ "src/butil/recordio.cc", "src/butil/popen.cpp", ] + select({ - ":darwin": [ - "src/butil/time/time_mac.cc", - "src/butil/mac/scoped_mach_port.cc", - ], - "//conditions:default": [ - "src/butil/file_util_linux.cc", - "src/butil/threading/platform_thread_linux.cc", - "src/butil/strings/sys_string_conversions_posix.cc", - ], + "@bazel_tools//tools/osx:darwin": [ + "src/butil/time/time_mac.cc", + "src/butil/mac/scoped_mach_port.cc", + ], + "//conditions:default": [ + "src/butil/file_util_linux.cc", + "src/butil/threading/platform_thread_linux.cc", + "src/butil/strings/sys_string_conversions_posix.cc", + ], }) objc_library( name = "macos_lib", - hdrs = [":config_h", + hdrs = [ "src/butil/atomicops.h", "src/butil/atomicops_internals_atomicword_compat.h", "src/butil/atomicops_internals_mac.h", @@ -269,10 +239,10 @@ objc_library( "src/butil/containers/hash_tables.h", "src/butil/debug/debugger.h", "src/butil/debug/leak_annotations.h", - "src/butil/file_util.h", "src/butil/file_descriptor_posix.h", - "src/butil/files/file_path.h", + "src/butil/file_util.h", "src/butil/files/file.h", + "src/butil/files/file_path.h", "src/butil/files/scoped_file.h", "src/butil/lazy_instance.h", "src/butil/logging.h", @@ -294,15 +264,18 @@ objc_library( "src/butil/strings/string_util_posix.h", "src/butil/strings/sys_string_conversions.h", "src/butil/synchronization/lock.h", - "src/butil/time/time.h", - "src/butil/time.h", "src/butil/third_party/dynamic_annotations/dynamic_annotations.h", + "src/butil/third_party/murmurhash3/murmurhash3.h", "src/butil/threading/platform_thread.h", - "src/butil/threading/thread_restrictions.h", "src/butil/threading/thread_id_name_manager.h", + "src/butil/threading/thread_restrictions.h", + "src/butil/time.h", + "src/butil/time/time.h", "src/butil/type_traits.h", - "src/butil/third_party/murmurhash3/murmurhash3.h", + ":config_h", ], + enable_modules = True, + includes = ["src/"], non_arc_srcs = [ "src/butil/mac/bundle_locations.mm", "src/butil/mac/foundation_util.mm", @@ -310,15 +283,13 @@ objc_library( "src/butil/threading/platform_thread_mac.mm", "src/butil/strings/sys_string_conversions_mac.mm", ], + tags = ["manual"], deps = [ "@com_github_gflags_gflags//:gflags", ] + select({ - ":with_glog": ["@com_github_google_glog//:glog"], + "//bazel/config:brpc_with_glog": ["@com_github_google_glog//:glog"], "//conditions:default": [], }), - includes = ["src/"], - enable_modules = True, - tags = ["manual"], ) cc_library( @@ -330,48 +301,52 @@ cc_library( "src/butil/**/*.h", "src/butil/**/*.hpp", "src/butil/**/**/*.h", - "src/butil/**/**/*.hpp", "src/butil/**/**/**/*.h", + "src/butil/**/**/*.hpp", + "src/butil/**/**/**/*.h", "src/butil/**/**/**/*.hpp", + ]) + [ "src/butil/third_party/dmg_fp/dtoa.cc", - ]) + [":config_h"], - deps = [ - "@com_google_protobuf//:protobuf", - "@com_github_gflags_gflags//:gflags", - "@zlib//:zlib", - ] + select({ - ":with_glog": ["@com_github_google_glog//:glog"], - "//conditions:default": [], - }) + select({ - ":darwin": [":macos_lib"], - "//conditions:default": [], - }) + select({ - ":darwin": ["//external:ssl_macos"], - "//conditions:default": ["//external:ssl"], - }), - includes = [ - "src/", + ":config_h", ], copts = COPTS + select({ - ":unittest": [ + "//bazel/config:brpc_build_for_unittest": [ "-DBVAR_NOT_LINK_DEFAULT_VARIABLES", "-DUNIT_TEST", ], "//conditions:default": [], }), + includes = [ + "src/", + ], linkopts = LINKOPTS, visibility = ["//visibility:public"], + deps = [ + "@com_github_gflags_gflags//:gflags", + "@com_github_madler_zlib//:zlib", + "@com_google_protobuf//:protobuf", + "@openssl//:crypto", + "@openssl//:ssl", + ] + select({ + "//bazel/config:brpc_with_glog": ["@com_github_google_glog//:glog"], + "//conditions:default": [], + }) + select({ + "@bazel_tools//tools/osx:darwin": [":macos_lib"], + "//conditions:default": [], + }), ) cc_library( name = "bvar", - srcs = glob([ - "src/bvar/*.cpp", - "src/bvar/detail/*.cpp", - ], - exclude = [ - "src/bvar/default_variables.cpp", - ]) + select({ - ":unittest": [], + srcs = glob( + [ + "src/bvar/*.cpp", + "src/bvar/detail/*.cpp", + ], + exclude = [ + "src/bvar/default_variables.cpp", + ], + ) + select({ + "//bazel/config:brpc_build_for_unittest": [], "//conditions:default": ["src/bvar/default_variables.cpp"], }), hdrs = glob([ @@ -379,21 +354,21 @@ cc_library( "src/bvar/utils/*.h", "src/bvar/detail/*.h", ]), - includes = [ - "src/", - ], - deps = [ - ":butil", - ], copts = COPTS + select({ - ":unittest": [ + "//bazel/config:brpc_build_for_unittest": [ "-DBVAR_NOT_LINK_DEFAULT_VARIABLES", "-DUNIT_TEST", ], "//conditions:default": [], }), + includes = [ + "src/", + ], linkopts = LINKOPTS, visibility = ["//visibility:public"], + deps = [ + ":butil", + ], ) cc_library( @@ -405,16 +380,16 @@ cc_library( "src/bthread/*.h", "src/bthread/*.list", ]), + copts = COPTS, includes = [ - "src/" + "src/", ], + linkopts = LINKOPTS, + visibility = ["//visibility:public"], deps = [ ":butil", ":bvar", ], - copts = COPTS, - linkopts = LINKOPTS, - visibility = ["//visibility:public"], ) cc_library( @@ -425,15 +400,15 @@ cc_library( hdrs = glob([ "src/json2pb/*.h", ]), + copts = COPTS, includes = [ "src/", ], + linkopts = LINKOPTS, + visibility = ["//visibility:public"], deps = [ ":butil", ], - copts = COPTS, - linkopts = LINKOPTS, - visibility = ["//visibility:public"], ) cc_library( @@ -447,42 +422,66 @@ cc_library( hdrs = glob([ "src/mcpack2pb/*.h", ]), + copts = COPTS, includes = [ "src/", ], + linkopts = LINKOPTS, + visibility = ["//visibility:public"], deps = [ + ":brpc_idl_options_cc_proto", ":butil", - ":cc_brpc_idl_options_proto", "@com_google_protobuf//:protoc_lib", ], - copts = COPTS, - linkopts = LINKOPTS, - visibility = ["//visibility:public"], ) -brpc_proto_library( - name = "cc_brpc_idl_options_proto", +filegroup( + name = "brpc_idl_options_proto_srcs", srcs = [ "src/idl_options.proto", ], + visibility = ["//visibility:public"], +) + +proto_library( + name = "brpc_idl_options_proto", + srcs = [":brpc_idl_options_proto_srcs"], + visibility = ["//visibility:public"], deps = [ - "@com_google_protobuf//:cc_wkt_protos" + "@com_google_protobuf//:descriptor_proto", ], +) + +cc_proto_library( + name = "brpc_idl_options_cc_proto", visibility = ["//visibility:public"], + deps = [":brpc_idl_options_proto"], ) -brpc_proto_library( - name = "cc_brpc_internal_proto", +filegroup( + name = "brpc_internal_proto_srcs", srcs = glob([ "src/brpc/*.proto", "src/brpc/policy/*.proto", ]), - include = "src/", + visibility = ["//visibility:public"], +) + +proto_library( + name = "brpc_internal_proto", + srcs = [":brpc_internal_proto_srcs"], + strip_import_prefix = "src", + visibility = ["//visibility:public"], deps = [ - ":cc_brpc_idl_options_proto", - "@com_google_protobuf//:cc_wkt_protos" + ":brpc_idl_options_proto", + "@com_google_protobuf//:descriptor_proto", ], +) + +cc_proto_library( + name = "brpc_internal_cc_proto", visibility = ["//visibility:public"], + deps = [":brpc_internal_proto"], ) cc_library( @@ -498,10 +497,11 @@ cc_library( "src/brpc/event_dispatcher_epoll.cpp", "src/brpc/event_dispatcher_kqueue.cpp", ]) + select({ - ":with_thrift" : glob([ + "//bazel/config:brpc_with_thrift": glob([ "src/brpc/thrift*.cpp", - "src/brpc/**/thrift*.cpp"]), - "//conditions:default" : [], + "src/brpc/**/thrift*.cpp", + ]), + "//conditions:default": [], }), hdrs = glob([ "src/brpc/*.h", @@ -509,21 +509,26 @@ cc_library( "src/brpc/event_dispatcher_epoll.cpp", "src/brpc/event_dispatcher_kqueue.cpp", ]), + copts = COPTS, includes = [ "src/", ], + linkopts = LINKOPTS, + visibility = ["//visibility:public"], deps = [ - ":butil", + ":brpc_internal_cc_proto", ":bthread", + ":butil", ":bvar", ":json2pb", ":mcpack2pb", - ":cc_brpc_internal_proto", "@com_github_google_leveldb//:leveldb", - ], - copts = COPTS, - linkopts = LINKOPTS, - visibility = ["//visibility:public"], + ] + select({ + "//bazel/config:brpc_with_thrift": [ + "@org_apache_thrift//:thrift", + ], + "//conditions:default": [], + }), ) cc_binary( @@ -531,11 +536,11 @@ cc_binary( srcs = [ "src/mcpack2pb/generator.cpp", ], - deps = [ - ":cc_brpc_idl_options_proto", - ":brpc", - ], copts = COPTS, linkopts = LINKOPTS, visibility = ["//visibility:public"], + deps = [ + ":brpc", + ":brpc_idl_options_cc_proto", + ], ) diff --git a/CMakeLists.txt b/CMakeLists.txt index 713a090ef9..59a29e6742 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,6 +24,7 @@ option(DEBUG "Print debug logs" OFF) option(WITH_DEBUG_SYMBOLS "With debug symbols" ON) option(WITH_THRIFT "With thrift framed protocol supported" OFF) option(WITH_SNAPPY "With snappy" OFF) +option(WITH_RDMA "With RDMA" OFF) option(BUILD_UNIT_TESTS "Whether to build unit tests" OFF) option(BUILD_BRPC_TOOLS "Whether to build brpc tools" ON) option(DOWNLOAD_GTEST "Download and build a fresh copy of googletest. Requires Internet access." ON) @@ -33,7 +34,7 @@ if(POLICY CMP0042) cmake_policy(SET CMP0042 NEW) endif() -set(BRPC_VERSION 1.2.0) +set(BRPC_VERSION 1.3.0) SET(CPACK_GENERATOR "DEB") SET(CPACK_DEBIAN_PACKAGE_MAINTAINER "brpc authors") @@ -68,6 +69,11 @@ if(WITH_THRIFT) set(THRIFT_LIB "thrift") endif() +set(WITH_RDMA_VAL "0") +if(WITH_RDMA) + set(WITH_RDMA_VAL "1") +endif() + include(GNUInstallDirs) configure_file(${PROJECT_SOURCE_DIR}/config.h.in ${PROJECT_SOURCE_DIR}/src/butil/config.h @ONLY) @@ -106,7 +112,7 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Darwin") set(CMAKE_CPP_FLAGS "${CMAKE_CPP_FLAGS} -Wno-deprecated-declarations -Wno-inconsistent-missing-override") endif() -set(CMAKE_CPP_FLAGS "${CMAKE_CPP_FLAGS} ${DEFINE_CLOCK_GETTIME} -DBRPC_WITH_GLOG=${WITH_GLOG_VAL} -DGFLAGS_NS=${GFLAGS_NS}") +set(CMAKE_CPP_FLAGS "${CMAKE_CPP_FLAGS} ${DEFINE_CLOCK_GETTIME} -DBRPC_WITH_GLOG=${WITH_GLOG_VAL} -DBRPC_WITH_RDMA=${WITH_RDMA_VAL} -DGFLAGS_NS=${GFLAGS_NS}") if(WITH_MESALINK) set(CMAKE_CPP_FLAGS "${CMAKE_CPP_FLAGS} -DUSE_MESALINK") endif() @@ -182,6 +188,15 @@ if(WITH_MESALINK) include_directories(${MESALINK_INCLUDE_PATH}) endif() +if(WITH_RDMA) + message("brpc compile with rdma") + find_path(RDMA_INCLUDE_PATH NAMES infiniband/verbs.h) + find_library(RDMA_LIB NAMES ibverbs) + if((NOT RDMA_INCLUDE_PATH) OR (NOT RDMA_LIB)) + message(FATAL_ERROR "Fail to find ibverbs") + endif() +endif() + find_library(PROTOC_LIB NAMES protoc) if(NOT PROTOC_LIB) message(FATAL_ERROR "Fail to find protoc lib") @@ -220,6 +235,10 @@ else() list(APPEND DYNAMIC_LIB ${OPENSSL_SSL_LIBRARY}) endif() +if(WITH_RDMA) + list(APPEND DYNAMIC_LIB ${RDMA_LIB}) +endif() + set(BRPC_PRIVATE_LIBS "-lgflags -lprotobuf -lleveldb -lprotoc -lssl -lcrypto -ldl -lz") if(WITH_GLOG) diff --git a/Makefile b/Makefile index f4ea71b477..d707c5fab2 100644 --- a/Makefile +++ b/Makefile @@ -194,7 +194,7 @@ JSON2PB_DIRS = src/json2pb JSON2PB_SOURCES = $(foreach d,$(JSON2PB_DIRS),$(wildcard $(addprefix $(d)/*,$(SRCEXTS)))) JSON2PB_OBJS = $(addsuffix .o, $(basename $(JSON2PB_SOURCES))) -BRPC_DIRS = src/brpc src/brpc/details src/brpc/builtin src/brpc/policy +BRPC_DIRS = src/brpc src/brpc/details src/brpc/builtin src/brpc/policy src/brpc/rdma THRIFT_SOURCES = $(foreach d,$(BRPC_DIRS),$(wildcard $(addprefix $(d)/thrift*,$(SRCEXTS)))) EXCLUDE_SOURCES = $(foreach d,$(BRPC_DIRS),$(wildcard $(addprefix $(d)/event_dispatcher_*,$(SRCEXTS)))) BRPC_SOURCES_ALL = $(foreach d,$(BRPC_DIRS),$(wildcard $(addprefix $(d)/*,$(SRCEXTS)))) diff --git a/RELEASE_VERSION b/RELEASE_VERSION index 26aaba0e86..f0bb29e763 100644 --- a/RELEASE_VERSION +++ b/RELEASE_VERSION @@ -1 +1 @@ -1.2.0 +1.3.0 diff --git a/WORKSPACE b/WORKSPACE index 441fd16dea..9ad4ada1d8 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -19,77 +19,278 @@ workspace(name = "com_github_brpc_brpc") load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") -skylib_version = "0.8.0" +# +# Constants +# + +BAZEL_IO_VERSION = "4.2.2" # 2021-12-03T09:26:35Z + +BAZEL_IO_SHA256 = "4c179ce66bbfff6ac5d81b8895518096e7f750866d08da2d4a574d1b8029e914" + +BAZEL_SKYLIB_VERSION = "1.1.1" # 2021-09-27T17:33:49Z + +BAZEL_SKYLIB_SHA256 = "c6966ec828da198c5d9adbaa94c05e3a1c7f21bd012a0b29ba8ddbccb2c93b0d" + +BAZEL_PLATFORMS_VERSION = "0.0.4" # 2021-02-26 + +BAZEL_PLATFORMS_SHA256 = "079945598e4b6cc075846f7fd6a9d0857c33a7afc0de868c2ccb96405225135d" + +RULES_PROTO_TAG = "4.0.0" # 2021-09-15T14:13:21Z + +RULES_PROTO_SHA256 = "66bfdf8782796239d3875d37e7de19b1d94301e8972b3cbd2446b332429b4df1" + +RULES_CC_COMMIT_ID = "0913abc3be0edff60af681c0473518f51fb9eeef" # 2021-08-12T14:14:28Z + +RULES_CC_SHA256 = "04d22a8c6f0caab1466ff9ae8577dbd12a0c7d0bc468425b75de094ec68ab4f9" + +# +# Starlark libraries +# + +http_archive( + name = "io_bazel", + sha256 = BAZEL_IO_SHA256, + strip_prefix = "bazel-" + BAZEL_IO_VERSION, + url = "https://github.com/bazelbuild/bazel/archive/" + BAZEL_IO_VERSION + ".zip", +) + http_archive( name = "bazel_skylib", - type = "tar.gz", - url = "https://github.com/bazelbuild/bazel-skylib/releases/download/{}/bazel-skylib.{}.tar.gz".format (skylib_version, skylib_version), - sha256 = "2ef429f5d7ce7111263289644d233707dba35e39696377ebab8b0bc701f7818e", + sha256 = BAZEL_SKYLIB_SHA256, + urls = [ + "https://github.com/bazelbuild/bazel-skylib/releases/download/{version}/bazel-skylib-{version}.tar.gz".format(version = BAZEL_SKYLIB_VERSION), + "https://mirror.bazel.build/github.com/bazelbuild/bazel-skylib/releases/download/{version}/bazel-skylib-{version}.tar.gz".format(version = BAZEL_SKYLIB_VERSION), + ], +) + +http_archive( + name = "platforms", + sha256 = BAZEL_PLATFORMS_SHA256, + urls = [ + "https://mirror.bazel.build/github.com/bazelbuild/platforms/releases/download/{version}/platforms-{version}.tar.gz".format(version = BAZEL_PLATFORMS_VERSION), + "https://github.com/bazelbuild/platforms/releases/download/{version}/platforms-{version}.tar.gz".format(version = BAZEL_PLATFORMS_VERSION), + ], +) + +http_archive( + name = "rules_proto", + sha256 = RULES_PROTO_SHA256, + strip_prefix = "rules_proto-{version}".format(version = RULES_PROTO_TAG), + urls = ["https://github.com/bazelbuild/rules_proto/archive/refs/tags/{version}.tar.gz".format(version = RULES_PROTO_TAG)], +) + +http_archive( + name = "rules_cc", + sha256 = RULES_CC_SHA256, + strip_prefix = "rules_cc-{commit_id}".format(commit_id = RULES_CC_COMMIT_ID), + urls = [ + "https://github.com/bazelbuild/rules_cc/archive/{commit_id}.tar.gz".format(commit_id = RULES_CC_COMMIT_ID), + ], +) + +http_archive( + name = "rules_perl", # 2021-09-23T03:21:58Z + sha256 = "55fbe071971772758ad669615fc9aac9b126db6ae45909f0f36de499f6201dd3", + strip_prefix = "rules_perl-2f4f36f454375e678e81e5ca465d4d497c5c02da", + urls = [ + "https://github.com/bazelbuild/rules_perl/archive/2f4f36f454375e678e81e5ca465d4d497c5c02da.tar.gz", + ], +) + +# Use rules_foreign_cc as fewer as possible. +# +# 1. Build very basic libraries without any further dependencies. +# 2. Build too complex to bazelize library. +http_archive( + name = "rules_foreign_cc", # 2021-12-03T17:15:40Z + sha256 = "1df78c7d7eed2dc21b8b325a2853c31933a81e7b780f9a59a5d078be9008b13a", + strip_prefix = "rules_foreign_cc-0.7.0", + url = "https://github.com/bazelbuild/rules_foreign_cc/archive/0.7.0.tar.gz", +) + +# +# Starlark rules +# + +load("@rules_foreign_cc//foreign_cc:repositories.bzl", "rules_foreign_cc_dependencies") + +rules_foreign_cc_dependencies(register_preinstalled_tools = False) + +# +# C++ Dependencies +# +# Ordered lexicographical. +# + +http_archive( + name = "boost", # 2021-08-05T01:30:05Z + build_file = "@com_github_nelhage_rules_boost//:BUILD.boost", + patch_cmds = ["rm -f doc/pdf/BUILD"], + patch_cmds_win = ["Remove-Item -Force doc/pdf/BUILD"], + sha256 = "5347464af5b14ac54bb945dc68f1dd7c56f0dad7262816b956138fc53bcc0131", + strip_prefix = "boost_1_77_0", + urls = [ + "https://boostorg.jfrog.io/artifactory/main/release/1.77.0/source/boost_1_77_0.tar.gz", + ], +) + +http_archive( + name = "com_github_gflags_gflags", # 2018-11-11T21:30:10Z + sha256 = "34af2f15cf7367513b352bdcd2493ab14ce43692d2dcd9dfc499492966c64dcf", + strip_prefix = "gflags-2.2.2", + urls = ["https://github.com/gflags/gflags/archive/v2.2.2.tar.gz"], +) + +http_archive( + name = "com_github_google_crc32c", # 2021-10-05T19:47:30Z + build_file = "//bazel/third_party/crc32c:crc32c.BUILD", + sha256 = "ac07840513072b7fcebda6e821068aa04889018f24e10e46181068fb214d7e56", + strip_prefix = "crc32c-1.1.2", + urls = ["https://github.com/google/crc32c/archive/1.1.2.tar.gz"], +) + +http_archive( + name = "com_github_google_glog", # 2021-05-07T23:06:39Z + patch_args = ["-p1"], + patches = [ + "//bazel/third_party/glog:0001-mark-override-resolve-warning.patch", + ], + sha256 = "21bc744fb7f2fa701ee8db339ded7dce4f975d0d55837a97be7d46e8382dea5a", + strip_prefix = "glog-0.5.0", + urls = ["https://github.com/google/glog/archive/v0.5.0.zip"], ) http_archive( - name = "com_google_protobuf", - strip_prefix = "protobuf-3.6.1.3", - sha256 = "9510dd2afc29e7245e9e884336f848c8a6600a14ae726adb6befdb4f786f0be2", - type = "zip", - url = "https://github.com/protocolbuffers/protobuf/archive/v3.6.1.3.zip", + name = "com_github_google_leveldb", # 2021-02-23T21:51:12Z + build_file = "//bazel/third_party/leveldb:leveldb.BUILD", + sha256 = "9a37f8a6174f09bd622bc723b55881dc541cd50747cbd08831c2a82d620f6d76", + strip_prefix = "leveldb-1.23", + urls = [ + "https://github.com/google/leveldb/archive/refs/tags/1.23.tar.gz", + ], ) http_archive( - name = "com_github_gflags_gflags", - strip_prefix = "gflags-46f73f88b18aee341538c0dfc22b1710a6abedef", - url = "https://github.com/gflags/gflags/archive/46f73f88b18aee341538c0dfc22b1710a6abedef.tar.gz", + name = "com_github_google_snappy", # 2017-08-25 + build_file = "//bazel/third_party/snappy:snappy.BUILD", + sha256 = "3dfa02e873ff51a11ee02b9ca391807f0c8ea0529a4924afa645fbf97163f9d4", + strip_prefix = "snappy-1.1.7", + urls = [ + "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/snappy/archive/1.1.7.tar.gz", + "https://github.com/google/snappy/archive/1.1.7.tar.gz", + ], ) -bind( - name = "gflags", - actual = "@com_github_gflags_gflags//:gflags", +http_archive( + name = "com_github_libevent_libevent", # 2020-07-05T13:33:03Z + build_file = "//bazel/third_party/event:event.BUILD", + sha256 = "92e6de1be9ec176428fd2367677e61ceffc2ee1cb119035037a27d346b0403bb", + strip_prefix = "libevent-2.1.12-stable", + urls = [ + "https://github.com/libevent/libevent/releases/download/release-2.1.12-stable/libevent-2.1.12-stable.tar.gz", + ], ) +# TODO: SIMD optimization. +# https://github.com/cloudflare/zlib http_archive( - name = "com_github_google_leveldb", - build_file = "//:leveldb.BUILD", - strip_prefix = "leveldb-a53934a3ae1244679f812d998a4f16f2c7f309a6", - url = "https://github.com/google/leveldb/archive/a53934a3ae1244679f812d998a4f16f2c7f309a6.tar.gz" + name = "com_github_madler_zlib", # 2017-01-15T17:57:23Z + build_file = "//bazel/third_party/zlib:zlib.BUILD", + sha256 = "c3e5e9fdd5004dcb542feda5ee4f0ff0744628baf8ed2dd5d66f8ca1197cb1a1", + strip_prefix = "zlib-1.2.11", + urls = [ + "https://downloads.sourceforge.net/project/libpng/zlib/1.2.11/zlib-1.2.11.tar.gz", + "https://zlib.net/fossils/zlib-1.2.11.tar.gz", + ], ) http_archive( - name = "com_github_google_glog", - build_file = "//:glog.BUILD", - strip_prefix = "glog-a6a166db069520dbbd653c97c2e5b12e08a8bb26", - url = "https://github.com/google/glog/archive/a6a166db069520dbbd653c97c2e5b12e08a8bb26.tar.gz" + name = "com_github_nelhage_rules_boost", # 2021-08-27T15:46:06Z + patch_cmds = ["sed -i 's/net_zlib_zlib/com_github_madler_zlib/g' BUILD.boost"], + patch_cmds_win = [ + """$content = (Get-Content 'BUILD.boost') -replace "net_zlib_zlib", "com_github_madler_zlib" +Set-Content BUILD.boost -Value $content -Encoding UTF8 +""", + ], + sha256 = "2d0b2eef7137730dbbb180397fe9c3d601f8f25950c43222cb3ee85256a21869", + strip_prefix = "rules_boost-fce83babe3f6287bccb45d2df013a309fa3194b8", + urls = [ + "https://github.com/nelhage/rules_boost/archive/fce83babe3f6287bccb45d2df013a309fa3194b8.tar.gz", + ], ) http_archive( - name = "com_google_googletest", - strip_prefix = "googletest-0fe96607d85cf3a25ac40da369db62bbee2939a5", - url = "https://github.com/google/googletest/archive/0fe96607d85cf3a25ac40da369db62bbee2939a5.tar.gz", + name = "com_google_absl", # 2021-09-27T18:06:52Z + sha256 = "2f0d9c7bc770f32bda06a9548f537b63602987d5a173791485151aba28a90099", + strip_prefix = "abseil-cpp-7143e49e74857a009e16c51f6076eb197b6ccb49", + urls = ["https://github.com/abseil/abseil-cpp/archive/7143e49e74857a009e16c51f6076eb197b6ccb49.zip"], ) -new_local_repository( - name = "openssl", - path = "/usr", - build_file = "//:openssl.BUILD", +http_archive( + name = "com_google_googletest", # 2021-07-09T13:28:13Z + sha256 = "12ef65654dc01ab40f6f33f9d02c04f2097d2cd9fbe48dc6001b29543583b0ad", + strip_prefix = "googletest-8d51ffdfab10b3fba636ae69bc03da4b54f8c235", + urls = ["https://github.com/google/googletest/archive/8d51ffdfab10b3fba636ae69bc03da4b54f8c235.zip"], ) -new_local_repository( - name = "openssl_macos", - build_file = "//:openssl.BUILD", - path = "/usr/local/opt/openssl", +http_archive( + name = "com_google_protobuf", # 2021-10-29T00:04:02Z + build_file = "//bazel/third_party/protobuf:protobuf.BUILD", + patch_cmds = [ + "sed -i protobuf.bzl -re '4,4d;417,508d'", + ], + patch_cmds_win = [ + """$content = Get-Content 'protobuf.bzl' | Where-Object { + -not ($_.ReadCount -ne 4) -and + -not ($_.ReadCount -ge 418 -and $_.ReadCount -le 509) +} +Set-Content protobuf.bzl -Value $content -Encoding UTF8 +""", + ], + sha256 = "87407cd28e7a9c95d9f61a098a53cf031109d451a7763e7dd1253abf8b4df422", + strip_prefix = "protobuf-3.19.1", + urls = ["https://github.com/protocolbuffers/protobuf/archive/refs/tags/v3.19.1.tar.gz"], ) -bind( - name = "ssl", - actual = "@openssl//:ssl" +# bRPC cannot use boringssl. Build openssl. +http_archive( + name = "openssl", # 2021-12-14T15:45:01Z + build_file = "//bazel/third_party/openssl:openssl.BUILD", + sha256 = "f89199be8b23ca45fc7cb9f1d8d3ee67312318286ad030f5316aca6462db6c96", + strip_prefix = "openssl-1.1.1m", + urls = [ + "https://www.openssl.org/source/openssl-1.1.1m.tar.gz", + "https://github.com/openssl/openssl/archive/OpenSSL_1_1_1m.tar.gz", + ], ) -bind( - name = "ssl_macos", - actual = "@openssl_macos//:ssl" +http_archive( + name = "org_apache_thrift", # 2021-09-11T11:54:01Z + build_file = "//bazel/third_party/thrift:thrift.BUILD", + sha256 = "d5883566d161f8f6ddd4e21f3a9e3e6b8272799d054820f1c25b11e86718f86b", + strip_prefix = "thrift-0.15.0", + urls = ["https://archive.apache.org/dist/thrift/0.15.0/thrift-0.15.0.tar.gz"], ) -new_local_repository( - name = "zlib", - build_file = "//:zlib.BUILD", - path = "/usr", +# +# Perl Dependencies +# + +load("@rules_perl//perl:deps.bzl", "perl_register_toolchains") + +perl_register_toolchains() + +# +# Tools Dependencies +# + +http_archive( + name = "com_grail_bazel_compdb", + sha256 = "d32835b26dd35aad8fd0ba0d712265df6565a3ad860d39e4c01ad41059ea7eda", + strip_prefix = "bazel-compilation-database-0.5.2", + urls = ["https://github.com/grailbio/bazel-compilation-database/archive/0.5.2.tar.gz"], ) + +load("@com_grail_bazel_compdb//:deps.bzl", "bazel_compdb_deps") + +bazel_compdb_deps() diff --git a/bazel/brpc.bzl b/bazel/BUILD.bazel similarity index 52% rename from bazel/brpc.bzl rename to bazel/BUILD.bazel index 137c1ffa3f..5100f2eeee 100644 --- a/bazel/brpc.bzl +++ b/bazel/BUILD.bazel @@ -13,19 +13,18 @@ # See the License for the specific language governing permissions and # limitations under the License. -load("@com_google_protobuf//:protobuf.bzl", "cc_proto_library") - -def brpc_proto_library(name, srcs, deps=[], include=None, visibility=None, testonly=0): - native.filegroup(name=name + "_proto_srcs", - srcs=srcs, - visibility=visibility,) - cc_proto_library(name=name, - srcs=srcs, - deps=deps, - cc_libs=["@com_google_protobuf//:protobuf"], - include=include, - protoc="@com_google_protobuf//:protoc", - default_runtime="@com_google_protobuf//:protobuf", - testonly=testonly, - visibility=visibility,) +load("@com_grail_bazel_compdb//:defs.bzl", "compilation_database") +load("@com_grail_bazel_output_base_util//:defs.bzl", "OUTPUT_BASE") +compilation_database( + name = "brpc_compdb", + # OUTPUT_BASE is a dynamic value that will vary for each user workspace. + # If you would like your build outputs to be the same across users, then + # skip supplying this value, and substitute the default constant value + # "__OUTPUT_BASE__" through an external tool like `sed` or `jq` (see + # below shell commands for usage). + output_base = OUTPUT_BASE, + targets = [ + "//:brpc", + ], +) diff --git a/bazel/config/BUILD.bazel b/bazel/config/BUILD.bazel new file mode 100644 index 0000000000..8b09826515 --- /dev/null +++ b/bazel/config/BUILD.bazel @@ -0,0 +1,105 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("@bazel_skylib//lib:selects.bzl", "selects") + +licenses(["notice"]) # Apache v2 + +selects.config_setting_group( + name = "brpc_with_glog", + match_any = [ + ":brpc_with_glog_deprecated_flag", + ":brpc_with_glog_new_flag", + ], + visibility = ["//visibility:public"], +) + +config_setting( + name = "brpc_with_glog_deprecated_flag", + define_values = {"with_glog": "true"}, +) + +config_setting( + name = "brpc_with_glog_new_flag", + define_values = {"BRPC_WITH_GLOG": "true"}, +) + +selects.config_setting_group( + name = "brpc_with_mesalink", + match_any = [ + ":brpc_with_mesalink_deprecated_flag", + ":brpc_with_mesalink_new_flag", + ], + visibility = ["//visibility:public"], +) + +config_setting( + name = "brpc_with_mesalink_deprecated_flag", + define_values = {"with_mesalink": "true"}, +) + +config_setting( + name = "brpc_with_mesalink_new_flag", + define_values = {"BRPC_WITH_MESALINK": "true"}, +) + +selects.config_setting_group( + name = "brpc_with_thrift", + match_any = [ + ":brpc_with_thrift_deprecated_flag", + ":brpc_with_thrift_new_flag", + ], + visibility = ["//visibility:public"], +) + +config_setting( + name = "brpc_with_thrift_legacy_version", + define_values = {"BRPC_WITH_THRIFT_LEGACY_VERSION": "true"}, + visibility = ["//visibility:public"], +) + +config_setting( + name = "brpc_with_thrift_deprecated_flag", + define_values = {"with_thrift": "true"}, +) + +config_setting( + name = "brpc_with_thrift_new_flag", + define_values = {"BRPC_WITH_THRIFT": "true"}, +) + +config_setting( + name = "brpc_build_for_unittest", + define_values = {"BRPC_BUILD_FOR_UNITTEST": "true"}, + visibility = ["//visibility:public"], +) + +config_setting( + name = "brpc_with_sse42", + define_values = {"BRPC_WITH_SSE42": "true"}, + visibility = ["//visibility:public"], +) + +config_setting( + name = "darwin", + values = {"cpu": "darwin"}, + visibility = ["//:__subpkgs__"], +) + +config_setting( + name = "brpc_with_rdma", + define_values = {"BRPC_WITH_RDMA": "true"}, + visibility = ["//visibility:public"], +) diff --git a/bazel/third_party/BUILD.bazel b/bazel/third_party/BUILD.bazel new file mode 100644 index 0000000000..fefa6c3fea --- /dev/null +++ b/bazel/third_party/BUILD.bazel @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Thie empty BUILD.bazel file is required to make Bazel treat +# this directory as a package. diff --git a/bazel/third_party/crc32c/BUILD.bazel b/bazel/third_party/crc32c/BUILD.bazel new file mode 100644 index 0000000000..fefa6c3fea --- /dev/null +++ b/bazel/third_party/crc32c/BUILD.bazel @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Thie empty BUILD.bazel file is required to make Bazel treat +# this directory as a package. diff --git a/bazel/third_party/crc32c/crc32c.BUILD b/bazel/third_party/crc32c/crc32c.BUILD new file mode 100644 index 0000000000..72715d48d6 --- /dev/null +++ b/bazel/third_party/crc32c/crc32c.BUILD @@ -0,0 +1,93 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test") + +genrule( + name = "crc32c_config_h", + srcs = ["src/crc32c_config.h.in"], + outs = ["crc32c/crc32c_config.h"], + cmd = """ +sed -e 's/#cmakedefine01/#define/' \ +""" + select({ + "@//bazel/config:brpc_with_sse42": """-e 's/ HAVE_SSE42/ HAVE_SSE42 1/' \ +""", + "//conditions:default": """-e 's/ HAVE_SSE42/ HAVE_SSE42 0/' \ +""", + }) + select({ + "@//bazel/config:brpc_with_glog": """-e 's/ CRC32C_TESTS_BUILT_WITH_GLOG/ CRC32C_TESTS_BUILT_WITH_GLOG 1/' \ +""", + "//conditions:default": """-e 's/ CRC32C_TESTS_BUILT_WITH_GLOG/ CRC32C_TESTS_BUILT_WITH_GLOG 0/' \ +""", + }) + """-e 's/ BYTE_ORDER_BIG_ENDIAN/ BYTE_ORDER_BIG_ENDIAN 0/' \ + -e 's/ HAVE_BUILTIN_PREFETCH/ HAVE_BUILTIN_PREFETCH 0/' \ + -e 's/ HAVE_MM_PREFETCH/ HAVE_MM_PREFETCH 0/' \ + -e 's/ HAVE_ARM64_CRC32C/ HAVE_ARM64_CRC32C 0/' \ + -e 's/ HAVE_STRONG_GETAUXVAL/ HAVE_STRONG_GETAUXVAL 0/' \ + -e 's/ HAVE_WEAK_GETAUXVAL/ HAVE_WEAK_GETAUXVAL 0/' \ + < $< > $@ +""", +) + +cc_library( + name = "crc32c", + srcs = [ + "src/crc32c.cc", + "src/crc32c_arm64.cc", + "src/crc32c_arm64.h", + "src/crc32c_arm64_check.h", + "src/crc32c_internal.h", + "src/crc32c_portable.cc", + "src/crc32c_prefetch.h", + "src/crc32c_read_le.h", + "src/crc32c_round_up.h", + "src/crc32c_sse42.cc", + "src/crc32c_sse42.h", + "src/crc32c_sse42_check.h", + ":crc32c_config_h", + ], + hdrs = [ + "include/crc32c/crc32c.h", + ], + copts = select({ + "@//bazel/config:brpc_with_sse42": ["-msse4.2"], + "//conditions:default": [], + }), + strip_include_prefix = "include", + visibility = ["//visibility:public"], +) + +cc_test( + name = "crc32c_test", + srcs = [ + "src/crc32c_arm64_unittest.cc", + "src/crc32c_extend_unittests.h", + "src/crc32c_portable_unittest.cc", + "src/crc32c_prefetch_unittest.cc", + "src/crc32c_read_le_unittest.cc", + "src/crc32c_round_up_unittest.cc", + "src/crc32c_sse42_unittest.cc", + "src/crc32c_test_main.cc", + "src/crc32c_unittest.cc", + ], + deps = [ + ":crc32c", + "@com_google_googletest//:gtest", + "@com_google_googletest//:gtest_main", + ] + select({ + "@//bazel/config:brpc_with_glog": ["@com_github_google_glog//:glog"], + "//conditions:default": [], + }), +) diff --git a/bazel/third_party/event/BUILD.bazel b/bazel/third_party/event/BUILD.bazel new file mode 100644 index 0000000000..fefa6c3fea --- /dev/null +++ b/bazel/third_party/event/BUILD.bazel @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Thie empty BUILD.bazel file is required to make Bazel treat +# this directory as a package. diff --git a/bazel/third_party/event/event.BUILD b/bazel/third_party/event/event.BUILD new file mode 100644 index 0000000000..6fd67592fa --- /dev/null +++ b/bazel/third_party/event/event.BUILD @@ -0,0 +1,59 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("@rules_foreign_cc//foreign_cc:defs.bzl", "cmake") + +filegroup( + name = "all_srcs", + srcs = glob(["**"]), +) + +cmake( + name = "event", + cache_entries = { + "EVENT__DISABLE_BENCHMARK": "ON", + "EVENT__DISABLE_TESTS": "ON", + "EVENT__DISABLE_SAMPLES": "ON", + "EVENT__LIBRARY_TYPE": "STATIC", + "OPENSSL_ROOT_DIR": "$$EXT_BUILD_DEPS$$/openssl", + }, + generate_args = ["-GNinja"], + lib_source = ":all_srcs", + linkopts = [ + "-pthread", + ], + out_static_libs = select({ + "@platforms//os:windows": [ + "event.lib", + "event_core.lib", + "event_extra.lib", + "event_openssl.lib", + "event_pthreads.lib", + ], + "//conditions:default": [ + "libevent.a", + "libevent_core.a", + "libevent_extra.a", + "libevent_openssl.a", + "libevent_pthreads.a", + ], + }), + visibility = ["//visibility:public"], + deps = [ + # Zlib is only used for testing. + "@openssl//:crypto", + "@openssl//:ssl", + ], +) diff --git a/bazel/third_party/glog/0001-mark-override-resolve-warning.patch b/bazel/third_party/glog/0001-mark-override-resolve-warning.patch new file mode 100644 index 0000000000..7a9bbb8ff3 --- /dev/null +++ b/bazel/third_party/glog/0001-mark-override-resolve-warning.patch @@ -0,0 +1,36 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +diff --git a/src/glog/logging.h.in b/src/glog/logging.h.in +index 421f1e0..a363141 100755 +--- a/src/glog/logging.h.in ++++ b/src/glog/logging.h.in +@@ -1334,7 +1334,7 @@ class GOOGLE_GLOG_DLL_DECL LogStreamBuf : public std::streambuf { + } + + // This effectively ignores overflow. +- int_type overflow(int_type ch) { ++ int_type overflow(int_type ch) override { + return ch; + } + +@@ -1862,7 +1862,7 @@ class GOOGLE_GLOG_DLL_DECL NullStreamFatal : public NullStream { + NullStreamFatal() { } + NullStreamFatal(const char* file, int line, const CheckOpString& result) : + NullStream(file, line, result) { } +- @ac_cv___attribute___noreturn@ ~NullStreamFatal() throw () { _exit(1); } ++ @ac_cv___attribute___noreturn@ ~NullStreamFatal() throw () override { _exit(1); } + }; + + // Install a signal handler that will dump signal information and a stack diff --git a/bazel/third_party/glog/BUILD.bazel b/bazel/third_party/glog/BUILD.bazel new file mode 100644 index 0000000000..fefa6c3fea --- /dev/null +++ b/bazel/third_party/glog/BUILD.bazel @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Thie empty BUILD.bazel file is required to make Bazel treat +# this directory as a package. diff --git a/bazel/third_party/leveldb/BUILD.bazel b/bazel/third_party/leveldb/BUILD.bazel new file mode 100644 index 0000000000..ea0c10913f --- /dev/null +++ b/bazel/third_party/leveldb/BUILD.bazel @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +exports_files( + [ + "port_config.h", + "port.h", + ], +) diff --git a/bazel/third_party/leveldb/leveldb.BUILD b/bazel/third_party/leveldb/leveldb.BUILD new file mode 100644 index 0000000000..787f77f3d3 --- /dev/null +++ b/bazel/third_party/leveldb/leveldb.BUILD @@ -0,0 +1,72 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("@rules_cc//cc:defs.bzl", "cc_library") +load("@bazel_skylib//rules:copy_file.bzl", "copy_file") + +copy_file( + name = "port_config_h", + src = "@//bazel/third_party/leveldb:port_config.h", + out = "port/port_config.h", + allow_symlink = True, +) + +copy_file( + name = "port_h", + src = "@//bazel/third_party/leveldb:port.h", + out = "port/port.h", + allow_symlink = True, +) + +cc_library( + name = "leveldb", + srcs = glob( + [ + "db/**/*.cc", + "db/**/*.h", + "helpers/**/*.cc", + "helpers/**/*.h", + "port/**/*.cc", + "port/**/*.h", + "table/**/*.cc", + "table/**/*.h", + "util/**/*.cc", + "util/**/*.h", + ], + exclude = [ + "**/*_test.cc", + "**/testutil.*", + "**/*_bench.cc", + "**/*_windows*", + "db/leveldbutil.cc", + ], + ), + hdrs = glob( + ["include/**/*.h"], + exclude = ["doc/**"], + ) + [ + ":port_h", + ":port_config_h", + ], + includes = [ + ".", + "include", + ], + visibility = ["//visibility:public"], + deps = [ + "@com_github_google_crc32c//:crc32c", + "@com_github_google_snappy//:snappy", + ], +) diff --git a/bazel/third_party/leveldb/port.h b/bazel/third_party/leveldb/port.h new file mode 100644 index 0000000000..8c9a4eafb7 --- /dev/null +++ b/bazel/third_party/leveldb/port.h @@ -0,0 +1,34 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#ifndef STORAGE_LEVELDB_PORT_PORT_H_ +#define STORAGE_LEVELDB_PORT_PORT_H_ + +#include + +#define LEVELDB_HAS_PORT_CONFIG_H 1 + +// Include the appropriate platform specific file below. If you are +// porting to a new platform, see "port_example.h" for documentation +// of what the new port_.h file must provide. +#include "port/port_stdcxx.h" + +#endif // STORAGE_LEVELDB_PORT_PORT_H_ diff --git a/bazel/third_party/leveldb/port_config.h b/bazel/third_party/leveldb/port_config.h new file mode 100644 index 0000000000..4ccdebfb71 --- /dev/null +++ b/bazel/third_party/leveldb/port_config.h @@ -0,0 +1,38 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// +// Copyright 2017 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#ifndef STORAGE_LEVELDB_PORT_PORT_CONFIG_H_ +#define STORAGE_LEVELDB_PORT_PORT_CONFIG_H_ + +// Define to 1 if you have a definition for fdatasync() in . +#define HAVE_FUNC_FDATASYNC 1 + +// Define to 1 if you have Google CRC32C. +#define HAVE_CRC32C 1 + +// Define to 1 if you have Google Snappy. +#define HAVE_SNAPPY 1 + +// Define to 1 if your processor stores words with the most significant byte +// first (like Motorola and SPARC, unlike Intel and VAX). +#define LEVELDB_IS_BIG_ENDIAN 0 + +#endif // STORAGE_LEVELDB_PORT_PORT_CONFIG_H_ diff --git a/bazel/third_party/openssl/BUILD.bazel b/bazel/third_party/openssl/BUILD.bazel new file mode 100644 index 0000000000..fefa6c3fea --- /dev/null +++ b/bazel/third_party/openssl/BUILD.bazel @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Thie empty BUILD.bazel file is required to make Bazel treat +# this directory as a package. diff --git a/bazel/third_party/openssl/openssl.BUILD b/bazel/third_party/openssl/openssl.BUILD new file mode 100644 index 0000000000..c02cb6fb5b --- /dev/null +++ b/bazel/third_party/openssl/openssl.BUILD @@ -0,0 +1,165 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Copyright 2016 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Copied from https://github.com/bazelbuild/rules_foreign_cc/blob/0.7.0/examples/third_party/openssl/BUILD.openssl.bazel +# +# Modifications: +# 1. Create alias `ssl` & `crypto` to align with boringssl. +# 2. Build with `@com_github_madler_zlib//:zlib`. +# 3. Add more configure options coming from debian openssl package configurations. + +load("@rules_foreign_cc//foreign_cc:defs.bzl", "configure_make", "configure_make_variant") + +filegroup( + name = "all_srcs", + srcs = glob(["**"]), +) + +CONFIGURE_OPTIONS = [ + "no-idea", + "no-mdc2", + "no-rc5", + "no-ssl3", + "no-ssl3-method", + "enable-rfc3779", + "enable-cms", + "no-capieng", + "enable-ec_nistp_64_gcc_128", + "--with-zlib-include=$$EXT_BUILD_DEPS$$", + "--with-zlib-lib=$$EXT_BUILD_DEPS$$", + # https://stackoverflow.com/questions/36220341/struct-in6-addr-has-no-member-named-s6-addr32-with-ansi + "-D_DEFAULT_SOURCE=1", + "-DPEDANTIC", +] + +LIB_NAME = "openssl" + +MAKE_TARGETS = [ + "build_libs", + "install_dev", +] + +config_setting( + name = "msvc_compiler", + flag_values = { + "@bazel_tools//tools/cpp:compiler": "msvc-cl", + }, + visibility = ["//visibility:public"], +) + +alias( + name = "ssl", + actual = "openssl", + visibility = ["//visibility:public"], +) + +alias( + name = "crypto", + actual = "openssl", + visibility = ["//visibility:public"], +) + +alias( + name = "openssl", + actual = select({ + ":msvc_compiler": "openssl_msvc", + "//conditions:default": "openssl_default", + }), + visibility = ["//visibility:public"], +) + +configure_make_variant( + name = "openssl_msvc", + build_data = [ + "@nasm//:nasm", + "@perl//:perl", + ], + configure_command = "Configure", + configure_in_place = True, + configure_options = CONFIGURE_OPTIONS + [ + "VC-WIN64A", + # Unset Microsoft Assembler (MASM) flags set by built-in MSVC toolchain, + # as NASM is unsed to build OpenSSL rather than MASM + "ASFLAGS=\" \"", + ], + configure_prefix = "$PERL", + env = { + # The Zi flag must be set otherwise OpenSSL fails to build due to missing .pdb files + "CFLAGS": "-Zi", + "PATH": "$$(dirname $(execpath @nasm//:nasm)):$$PATH", + "PERL": "$(execpath @perl//:perl)", + }, + lib_name = LIB_NAME, + lib_source = ":all_srcs", + out_static_libs = [ + "libssl.lib", + "libcrypto.lib", + ], + targets = MAKE_TARGETS, + toolchain = "@rules_foreign_cc//toolchains:preinstalled_nmake_toolchain", + deps = [ + "@com_github_madler_zlib//:zlib", + ], +) + +# https://wiki.openssl.org/index.php/Compilation_and_Installation +configure_make( + name = "openssl_default", + configure_command = "config", + configure_in_place = True, + configure_options = CONFIGURE_OPTIONS, + env = select({ + "@platforms//os:macos": { + "AR": "", + "PERL": "$$EXT_BUILD_ROOT$$/$(PERL)", + }, + "//conditions:default": { + "PERL": "$$EXT_BUILD_ROOT$$/$(PERL)", + }, + }), + lib_name = LIB_NAME, + lib_source = ":all_srcs", + # Note that for Linux builds, libssl must come before libcrypto on the linker command-line. + # As such, libssl must be listed before libcrypto + out_static_libs = [ + "libssl.a", + "libcrypto.a", + ], + targets = MAKE_TARGETS, + toolchains = ["@rules_perl//:current_toolchain"], + deps = [ + "@com_github_madler_zlib//:zlib", + ], +) + +filegroup( + name = "gen_dir", + srcs = [":openssl"], + output_group = "gen_dir", +) diff --git a/bazel/third_party/protobuf/BUILD.bazel b/bazel/third_party/protobuf/BUILD.bazel new file mode 100644 index 0000000000..fefa6c3fea --- /dev/null +++ b/bazel/third_party/protobuf/BUILD.bazel @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Thie empty BUILD.bazel file is required to make Bazel treat +# this directory as a package. diff --git a/bazel/third_party/protobuf/protobuf.BUILD b/bazel/third_party/protobuf/protobuf.BUILD new file mode 100644 index 0000000000..0d5188ea1c --- /dev/null +++ b/bazel/third_party/protobuf/protobuf.BUILD @@ -0,0 +1,498 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Copyright 2008 Google Inc. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# Copied from https://github.com/protocolbuffers/protobuf/blob/v3.19.1/BUILD +# +# Modifications: +# 1. Remove all non-cxx rules. +# 2. Remove android support. +# 3. zlib use @com_github_madler_zlib//:zlib + +# Bazel (https://bazel.build/) BUILD file for Protobuf. + +load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library", native_cc_proto_library = "cc_proto_library") +load("@rules_proto//proto:defs.bzl", "proto_lang_toolchain", "proto_library") +load(":compiler_config_setting.bzl", "create_compiler_config_setting") +load( + ":protobuf.bzl", + "adapt_proto_library", +) + +licenses(["notice"]) + +exports_files(["LICENSE"]) + +################################################################################ +# build configuration +################################################################################ + +################################################################################ +# ZLIB configuration +################################################################################ + +ZLIB_DEPS = ["@com_github_madler_zlib//:zlib"] + +################################################################################ +# Protobuf Runtime Library +################################################################################ + +MSVC_COPTS = [ + "/wd4018", # -Wno-sign-compare + "/wd4065", # switch statement contains 'default' but no 'case' labels + "/wd4146", # unary minus operator applied to unsigned type, result still unsigned + "/wd4244", # 'conversion' conversion from 'type1' to 'type2', possible loss of data + "/wd4251", # 'identifier' : class 'type' needs to have dll-interface to be used by clients of class 'type2' + "/wd4267", # 'var' : conversion from 'size_t' to 'type', possible loss of data + "/wd4305", # 'identifier' : truncation from 'type1' to 'type2' + "/wd4307", # 'operator' : integral constant overflow + "/wd4309", # 'conversion' : truncation of constant value + "/wd4334", # 'operator' : result of 32-bit shift implicitly converted to 64 bits (was 64-bit shift intended?) + "/wd4355", # 'this' : used in base member initializer list + "/wd4506", # no definition for inline function 'function' + "/wd4800", # 'type' : forcing value to bool 'true' or 'false' (performance warning) + "/wd4996", # The compiler encountered a deprecated declaration. +] + +COPTS = select({ + ":msvc": MSVC_COPTS, + "//conditions:default": [ + "-DHAVE_ZLIB", + "-Wmissing-field-initializers", + "-Woverloaded-virtual", + "-Wno-sign-compare", + ], +}) + +create_compiler_config_setting( + name = "msvc", + value = "msvc-cl", + visibility = [ + # Public, but Protobuf only visibility. + "//:__subpackages__", + ], +) + +# Android and MSVC builds do not need to link in a separate pthread library. +LINK_OPTS = select({ + ":msvc": [ + # Suppress linker warnings about files with no symbols defined. + "-ignore:4221", + ], + "//conditions:default": [ + "-lpthread", + "-lm", + ], +}) + +cc_library( + name = "protobuf_lite", + srcs = [ + # AUTOGEN(protobuf_lite_srcs) + "src/google/protobuf/any_lite.cc", + "src/google/protobuf/arena.cc", + "src/google/protobuf/arenastring.cc", + "src/google/protobuf/extension_set.cc", + "src/google/protobuf/generated_enum_util.cc", + "src/google/protobuf/generated_message_table_driven_lite.cc", + "src/google/protobuf/generated_message_tctable_lite.cc", + "src/google/protobuf/generated_message_util.cc", + "src/google/protobuf/implicit_weak_message.cc", + "src/google/protobuf/inlined_string_field.cc", + "src/google/protobuf/io/coded_stream.cc", + "src/google/protobuf/io/io_win32.cc", + "src/google/protobuf/io/strtod.cc", + "src/google/protobuf/io/zero_copy_stream.cc", + "src/google/protobuf/io/zero_copy_stream_impl.cc", + "src/google/protobuf/io/zero_copy_stream_impl_lite.cc", + "src/google/protobuf/map.cc", + "src/google/protobuf/message_lite.cc", + "src/google/protobuf/parse_context.cc", + "src/google/protobuf/repeated_field.cc", + "src/google/protobuf/repeated_ptr_field.cc", + "src/google/protobuf/stubs/bytestream.cc", + "src/google/protobuf/stubs/common.cc", + "src/google/protobuf/stubs/int128.cc", + "src/google/protobuf/stubs/status.cc", + "src/google/protobuf/stubs/statusor.cc", + "src/google/protobuf/stubs/stringpiece.cc", + "src/google/protobuf/stubs/stringprintf.cc", + "src/google/protobuf/stubs/structurally_valid.cc", + "src/google/protobuf/stubs/strutil.cc", + "src/google/protobuf/stubs/time.cc", + "src/google/protobuf/wire_format_lite.cc", + ], + hdrs = glob([ + "src/google/protobuf/**/*.h", + "src/google/protobuf/**/*.inc", + ]), + copts = COPTS, + includes = ["src/"], + linkopts = LINK_OPTS, + visibility = ["//visibility:public"], +) + +PROTOBUF_DEPS = select({ + ":msvc": [], + "//conditions:default": ZLIB_DEPS, +}) + +cc_library( + name = "protobuf", + srcs = [ + # AUTOGEN(protobuf_srcs) + "src/google/protobuf/any.cc", + "src/google/protobuf/any.pb.cc", + "src/google/protobuf/api.pb.cc", + "src/google/protobuf/compiler/importer.cc", + "src/google/protobuf/compiler/parser.cc", + "src/google/protobuf/descriptor.cc", + "src/google/protobuf/descriptor.pb.cc", + "src/google/protobuf/descriptor_database.cc", + "src/google/protobuf/duration.pb.cc", + "src/google/protobuf/dynamic_message.cc", + "src/google/protobuf/empty.pb.cc", + "src/google/protobuf/extension_set_heavy.cc", + "src/google/protobuf/field_mask.pb.cc", + "src/google/protobuf/generated_message_bases.cc", + "src/google/protobuf/generated_message_reflection.cc", + "src/google/protobuf/generated_message_table_driven.cc", + "src/google/protobuf/generated_message_tctable_full.cc", + "src/google/protobuf/io/gzip_stream.cc", + "src/google/protobuf/io/printer.cc", + "src/google/protobuf/io/tokenizer.cc", + "src/google/protobuf/map_field.cc", + "src/google/protobuf/message.cc", + "src/google/protobuf/reflection_ops.cc", + "src/google/protobuf/service.cc", + "src/google/protobuf/source_context.pb.cc", + "src/google/protobuf/struct.pb.cc", + "src/google/protobuf/stubs/substitute.cc", + "src/google/protobuf/text_format.cc", + "src/google/protobuf/timestamp.pb.cc", + "src/google/protobuf/type.pb.cc", + "src/google/protobuf/unknown_field_set.cc", + "src/google/protobuf/util/delimited_message_util.cc", + "src/google/protobuf/util/field_comparator.cc", + "src/google/protobuf/util/field_mask_util.cc", + "src/google/protobuf/util/internal/datapiece.cc", + "src/google/protobuf/util/internal/default_value_objectwriter.cc", + "src/google/protobuf/util/internal/error_listener.cc", + "src/google/protobuf/util/internal/field_mask_utility.cc", + "src/google/protobuf/util/internal/json_escaping.cc", + "src/google/protobuf/util/internal/json_objectwriter.cc", + "src/google/protobuf/util/internal/json_stream_parser.cc", + "src/google/protobuf/util/internal/object_writer.cc", + "src/google/protobuf/util/internal/proto_writer.cc", + "src/google/protobuf/util/internal/protostream_objectsource.cc", + "src/google/protobuf/util/internal/protostream_objectwriter.cc", + "src/google/protobuf/util/internal/type_info.cc", + "src/google/protobuf/util/internal/utility.cc", + "src/google/protobuf/util/json_util.cc", + "src/google/protobuf/util/message_differencer.cc", + "src/google/protobuf/util/time_util.cc", + "src/google/protobuf/util/type_resolver_util.cc", + "src/google/protobuf/wire_format.cc", + "src/google/protobuf/wrappers.pb.cc", + ], + hdrs = glob([ + "src/**/*.h", + "src/**/*.inc", + ]), + copts = COPTS, + includes = ["src/"], + linkopts = LINK_OPTS, + visibility = ["//visibility:public"], + deps = [":protobuf_lite"] + PROTOBUF_DEPS, +) + +# This provides just the header files for use in projects that need to build +# shared libraries for dynamic loading. This target is available until Bazel +# adds native support for such use cases. +# TODO(keveman): Remove this target once the support gets added to Bazel. +cc_library( + name = "protobuf_headers", + hdrs = glob([ + "src/**/*.h", + "src/**/*.inc", + ]), + includes = ["src/"], + visibility = ["//visibility:public"], +) + +# Map of all well known protos. +# name => (include path, imports) +WELL_KNOWN_PROTO_MAP = { + "any": ("src/google/protobuf/any.proto", []), + "api": ( + "src/google/protobuf/api.proto", + [ + "source_context", + "type", + ], + ), + "compiler_plugin": ( + "src/google/protobuf/compiler/plugin.proto", + ["descriptor"], + ), + "descriptor": ("src/google/protobuf/descriptor.proto", []), + "duration": ("src/google/protobuf/duration.proto", []), + "empty": ("src/google/protobuf/empty.proto", []), + "field_mask": ("src/google/protobuf/field_mask.proto", []), + "source_context": ("src/google/protobuf/source_context.proto", []), + "struct": ("src/google/protobuf/struct.proto", []), + "timestamp": ("src/google/protobuf/timestamp.proto", []), + "type": ( + "src/google/protobuf/type.proto", + [ + "any", + "source_context", + ], + ), + "wrappers": ("src/google/protobuf/wrappers.proto", []), +} + +WELL_KNOWN_PROTOS = [value[0] for value in WELL_KNOWN_PROTO_MAP.values()] + +LITE_WELL_KNOWN_PROTO_MAP = { + "any": ("src/google/protobuf/any.proto", []), + "api": ( + "src/google/protobuf/api.proto", + [ + "source_context", + "type", + ], + ), + "duration": ("src/google/protobuf/duration.proto", []), + "empty": ("src/google/protobuf/empty.proto", []), + "field_mask": ("src/google/protobuf/field_mask.proto", []), + "source_context": ("src/google/protobuf/source_context.proto", []), + "struct": ("src/google/protobuf/struct.proto", []), + "timestamp": ("src/google/protobuf/timestamp.proto", []), + "type": ( + "src/google/protobuf/type.proto", + [ + "any", + "source_context", + ], + ), + "wrappers": ("src/google/protobuf/wrappers.proto", []), +} + +LITE_WELL_KNOWN_PROTOS = [value[0] for value in LITE_WELL_KNOWN_PROTO_MAP.values()] + +filegroup( + name = "well_known_protos", + srcs = WELL_KNOWN_PROTOS, + visibility = ["//visibility:public"], +) + +filegroup( + name = "lite_well_known_protos", + srcs = LITE_WELL_KNOWN_PROTOS, + visibility = ["//visibility:public"], +) + +adapt_proto_library( + name = "cc_wkt_protos_genproto", + visibility = ["//visibility:public"], + deps = [proto + "_proto" for proto in WELL_KNOWN_PROTO_MAP.keys()], +) + +cc_library( + name = "cc_wkt_protos", + deprecation = "Only for backward compatibility. Do not use.", + visibility = ["//visibility:public"], +) + +################################################################################ +# Well Known Types Proto Library Rules +# +# These proto_library rules can be used with one of the language specific proto +# library rules i.e. java_proto_library: +# +# java_proto_library( +# name = "any_java_proto", +# deps = ["@com_google_protobuf//:any_proto], +# ) +################################################################################ + +[proto_library( + name = proto[0] + "_proto", + srcs = [proto[1][0]], + strip_import_prefix = "src", + visibility = ["//visibility:public"], + deps = [dep + "_proto" for dep in proto[1][1]], +) for proto in WELL_KNOWN_PROTO_MAP.items()] + +[native_cc_proto_library( + name = proto + "_cc_proto", + visibility = ["//visibility:private"], + deps = [proto + "_proto"], +) for proto in WELL_KNOWN_PROTO_MAP.keys()] + +################################################################################ +# Protocol Buffers Compiler +################################################################################ + +cc_library( + name = "protoc_lib", + srcs = [ + # AUTOGEN(protoc_lib_srcs) + "src/google/protobuf/compiler/code_generator.cc", + "src/google/protobuf/compiler/command_line_interface.cc", + "src/google/protobuf/compiler/cpp/cpp_enum.cc", + "src/google/protobuf/compiler/cpp/cpp_enum_field.cc", + "src/google/protobuf/compiler/cpp/cpp_extension.cc", + "src/google/protobuf/compiler/cpp/cpp_field.cc", + "src/google/protobuf/compiler/cpp/cpp_file.cc", + "src/google/protobuf/compiler/cpp/cpp_generator.cc", + "src/google/protobuf/compiler/cpp/cpp_helpers.cc", + "src/google/protobuf/compiler/cpp/cpp_map_field.cc", + "src/google/protobuf/compiler/cpp/cpp_message.cc", + "src/google/protobuf/compiler/cpp/cpp_message_field.cc", + "src/google/protobuf/compiler/cpp/cpp_padding_optimizer.cc", + "src/google/protobuf/compiler/cpp/cpp_parse_function_generator.cc", + "src/google/protobuf/compiler/cpp/cpp_primitive_field.cc", + "src/google/protobuf/compiler/cpp/cpp_service.cc", + "src/google/protobuf/compiler/cpp/cpp_string_field.cc", + "src/google/protobuf/compiler/csharp/csharp_doc_comment.cc", + "src/google/protobuf/compiler/csharp/csharp_enum.cc", + "src/google/protobuf/compiler/csharp/csharp_enum_field.cc", + "src/google/protobuf/compiler/csharp/csharp_field_base.cc", + "src/google/protobuf/compiler/csharp/csharp_generator.cc", + "src/google/protobuf/compiler/csharp/csharp_helpers.cc", + "src/google/protobuf/compiler/csharp/csharp_map_field.cc", + "src/google/protobuf/compiler/csharp/csharp_message.cc", + "src/google/protobuf/compiler/csharp/csharp_message_field.cc", + "src/google/protobuf/compiler/csharp/csharp_primitive_field.cc", + "src/google/protobuf/compiler/csharp/csharp_reflection_class.cc", + "src/google/protobuf/compiler/csharp/csharp_repeated_enum_field.cc", + "src/google/protobuf/compiler/csharp/csharp_repeated_message_field.cc", + "src/google/protobuf/compiler/csharp/csharp_repeated_primitive_field.cc", + "src/google/protobuf/compiler/csharp/csharp_source_generator_base.cc", + "src/google/protobuf/compiler/csharp/csharp_wrapper_field.cc", + "src/google/protobuf/compiler/java/java_context.cc", + "src/google/protobuf/compiler/java/java_doc_comment.cc", + "src/google/protobuf/compiler/java/java_enum.cc", + "src/google/protobuf/compiler/java/java_enum_field.cc", + "src/google/protobuf/compiler/java/java_enum_field_lite.cc", + "src/google/protobuf/compiler/java/java_enum_lite.cc", + "src/google/protobuf/compiler/java/java_extension.cc", + "src/google/protobuf/compiler/java/java_extension_lite.cc", + "src/google/protobuf/compiler/java/java_field.cc", + "src/google/protobuf/compiler/java/java_file.cc", + "src/google/protobuf/compiler/java/java_generator.cc", + "src/google/protobuf/compiler/java/java_generator_factory.cc", + "src/google/protobuf/compiler/java/java_helpers.cc", + "src/google/protobuf/compiler/java/java_kotlin_generator.cc", + "src/google/protobuf/compiler/java/java_map_field.cc", + "src/google/protobuf/compiler/java/java_map_field_lite.cc", + "src/google/protobuf/compiler/java/java_message.cc", + "src/google/protobuf/compiler/java/java_message_builder.cc", + "src/google/protobuf/compiler/java/java_message_builder_lite.cc", + "src/google/protobuf/compiler/java/java_message_field.cc", + "src/google/protobuf/compiler/java/java_message_field_lite.cc", + "src/google/protobuf/compiler/java/java_message_lite.cc", + "src/google/protobuf/compiler/java/java_name_resolver.cc", + "src/google/protobuf/compiler/java/java_primitive_field.cc", + "src/google/protobuf/compiler/java/java_primitive_field_lite.cc", + "src/google/protobuf/compiler/java/java_service.cc", + "src/google/protobuf/compiler/java/java_shared_code_generator.cc", + "src/google/protobuf/compiler/java/java_string_field.cc", + "src/google/protobuf/compiler/java/java_string_field_lite.cc", + "src/google/protobuf/compiler/js/js_generator.cc", + "src/google/protobuf/compiler/js/well_known_types_embed.cc", + "src/google/protobuf/compiler/objectivec/objectivec_enum.cc", + "src/google/protobuf/compiler/objectivec/objectivec_enum_field.cc", + "src/google/protobuf/compiler/objectivec/objectivec_extension.cc", + "src/google/protobuf/compiler/objectivec/objectivec_field.cc", + "src/google/protobuf/compiler/objectivec/objectivec_file.cc", + "src/google/protobuf/compiler/objectivec/objectivec_generator.cc", + "src/google/protobuf/compiler/objectivec/objectivec_helpers.cc", + "src/google/protobuf/compiler/objectivec/objectivec_map_field.cc", + "src/google/protobuf/compiler/objectivec/objectivec_message.cc", + "src/google/protobuf/compiler/objectivec/objectivec_message_field.cc", + "src/google/protobuf/compiler/objectivec/objectivec_oneof.cc", + "src/google/protobuf/compiler/objectivec/objectivec_primitive_field.cc", + "src/google/protobuf/compiler/php/php_generator.cc", + "src/google/protobuf/compiler/plugin.cc", + "src/google/protobuf/compiler/plugin.pb.cc", + "src/google/protobuf/compiler/python/python_generator.cc", + "src/google/protobuf/compiler/ruby/ruby_generator.cc", + "src/google/protobuf/compiler/subprocess.cc", + "src/google/protobuf/compiler/zip_writer.cc", + ], + copts = COPTS, + includes = ["src/"], + linkopts = LINK_OPTS, + visibility = ["//visibility:public"], + deps = [":protobuf"], +) + +cc_binary( + name = "protoc", + srcs = ["src/google/protobuf/compiler/main.cc"], + linkopts = LINK_OPTS, + visibility = ["//visibility:public"], + deps = [":protoc_lib"], +) + +proto_lang_toolchain( + name = "cc_toolchain", + blacklisted_protos = [proto + "_proto" for proto in WELL_KNOWN_PROTO_MAP.keys()], + command_line = "--cpp_out=$(OUT)", + runtime = ":protobuf", + visibility = ["//visibility:public"], +) + +alias( + name = "objectivec", + actual = "//objectivec", + visibility = ["//visibility:public"], +) + +alias( + name = "protobuf_objc", + actual = "//objectivec", + visibility = ["//visibility:public"], +) diff --git a/bazel/third_party/snappy/BUILD.bazel b/bazel/third_party/snappy/BUILD.bazel new file mode 100644 index 0000000000..fefa6c3fea --- /dev/null +++ b/bazel/third_party/snappy/BUILD.bazel @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Thie empty BUILD.bazel file is required to make Bazel treat +# this directory as a package. diff --git a/bazel/third_party/snappy/snappy.BUILD b/bazel/third_party/snappy/snappy.BUILD new file mode 100644 index 0000000000..9cfb1ade1a --- /dev/null +++ b/bazel/third_party/snappy/snappy.BUILD @@ -0,0 +1,122 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Copied from https://github.com/tensorflow/tensorflow/blob/bdd8bf316e4ab7d699127d192d30eb614a158462/third_party/snappy.BUILD + +load("@rules_cc//cc:defs.bzl", "cc_library") + +cc_library( + name = "snappy", + srcs = [ + "config.h", + "snappy.cc", + "snappy.h", + "snappy-internal.h", + "snappy-sinksource.cc", + "snappy-stubs-internal.cc", + "snappy-stubs-internal.h", + "snappy-stubs-public.h", + ], + hdrs = [ + "snappy.h", + "snappy-sinksource.h", + ], + copts = [ + "-DHAVE_CONFIG_H", + "-fno-exceptions", + "-Wno-sign-compare", + "-Wno-shift-negative-value", + ], + includes = ["."], + visibility = ["//visibility:public"], +) + +genrule( + name = "config_h", + outs = ["config.h"], + cmd = "\n".join([ + "cat <<'EOF' >$@", + "#define HAVE_STDDEF_H 1", + "#define HAVE_STDINT_H 1", + "", + "#ifdef __has_builtin", + "# if !defined(HAVE_BUILTIN_EXPECT) && __has_builtin(__builtin_expect)", + "# define HAVE_BUILTIN_EXPECT 1", + "# endif", + "# if !defined(HAVE_BUILTIN_CTZ) && __has_builtin(__builtin_ctzll)", + "# define HAVE_BUILTIN_CTZ 1", + "# endif", + "#elif defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ >= 4)", + "# ifndef HAVE_BUILTIN_EXPECT", + "# define HAVE_BUILTIN_EXPECT 1", + "# endif", + "# ifndef HAVE_BUILTIN_CTZ", + "# define HAVE_BUILTIN_CTZ 1", + "# endif", + "#endif", + "", + "#ifdef __has_include", + "# if !defined(HAVE_BYTESWAP_H) && __has_include()", + "# define HAVE_BYTESWAP_H 1", + "# endif", + "# if !defined(HAVE_UNISTD_H) && __has_include()", + "# define HAVE_UNISTD_H 1", + "# endif", + "# if !defined(HAVE_SYS_ENDIAN_H) && __has_include()", + "# define HAVE_SYS_ENDIAN_H 1", + "# endif", + "# if !defined(HAVE_SYS_MMAN_H) && __has_include()", + "# define HAVE_SYS_MMAN_H 1", + "# endif", + "# if !defined(HAVE_SYS_UIO_H) && __has_include()", + "# define HAVE_SYS_UIO_H 1", + "# endif", + "#endif", + "", + "#ifndef SNAPPY_IS_BIG_ENDIAN", + "# ifdef __s390x__", + "# define SNAPPY_IS_BIG_ENDIAN 1", + "# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__", + "# define SNAPPY_IS_BIG_ENDIAN 1", + "# endif", + "#endif", + "EOF", + ]), +) + +genrule( + name = "snappy_stubs_public_h", + srcs = ["snappy-stubs-public.h.in"], + outs = ["snappy-stubs-public.h"], + cmd = ("sed " + + "-e 's/$${\\(.*\\)_01}/\\1/g' " + + "-e 's/$${SNAPPY_MAJOR}/1/g' " + + "-e 's/$${SNAPPY_MINOR}/1/g' " + + "-e 's/$${SNAPPY_PATCHLEVEL}/7/g' " + + "$< >$@"), +) diff --git a/bazel/third_party/thrift/BUILD.bazel b/bazel/third_party/thrift/BUILD.bazel new file mode 100644 index 0000000000..fefa6c3fea --- /dev/null +++ b/bazel/third_party/thrift/BUILD.bazel @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Thie empty BUILD.bazel file is required to make Bazel treat +# this directory as a package. diff --git a/bazel/third_party/thrift/thrift.BUILD b/bazel/third_party/thrift/thrift.BUILD new file mode 100644 index 0000000000..079606aacf --- /dev/null +++ b/bazel/third_party/thrift/thrift.BUILD @@ -0,0 +1,75 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("@rules_foreign_cc//foreign_cc:defs.bzl", "cmake") + +filegroup( + name = "all_srcs", + srcs = glob(["**"]), +) + +cmake( + name = "thrift", + cache_entries = { + "BUILD_COMPILER": "OFF", + "BUILD_TESTING": "OFF", + "BUILD_TUTORIALS": "OFF", + "WITH_AS3": "OFF", + "WITH_CPP": "ON", + "WITH_C_GLIB": "OFF", + "WITH_JAVA": "OFF", + "WITH_JAVASCRIPT": "OFF", + "WITH_NODEJS": "OFF", + "WITH_PYTHON": "OFF", + "BUILD_SHARED_LIBS": "OFF", + "Boost_USE_STATIC_LIBS": "ON", + "BOOST_ROOT": "$$EXT_BUILD_DEPS$$", + "LIBEVENT_INCLUDE_DIRS": "$$EXT_BUILD_DEPS$$/event/include", + "LIBEVENT_LIBRARIES": "$$EXT_BUILD_DEPS$$/event/lib/libevent.a", + "OPENSSL_ROOT_DIR": "$$EXT_BUILD_DEPS$$/openssl", + "ZLIB_ROOT": "$$EXT_BUILD_DEPS$$/zlib", + }, + generate_args = ["-GNinja"], + lib_source = ":all_srcs", + linkopts = [ + "-pthread", + ], + out_static_libs = select({ + "@platforms//os:windows": [ + "thrift.lib", + "thriftnb.lib", + "thriftz.lib", + ], + "//conditions:default": [ + "libthrift.a", + "libthriftnb.a", + "libthriftz.a", + ], + }), + visibility = ["//visibility:public"], + deps = [ + "@boost//:algorithm", + "@boost//:locale", + "@boost//:noncopyable", + "@boost//:numeric_conversion", + "@boost//:scoped_array", + "@boost//:smart_ptr", + "@boost//:tokenizer", + "@com_github_libevent_libevent//:event", + "@com_github_madler_zlib//:zlib", + "@openssl//:crypto", + "@openssl//:ssl", + ], +) diff --git a/bazel/third_party/zlib/BUILD.bazel b/bazel/third_party/zlib/BUILD.bazel new file mode 100644 index 0000000000..fefa6c3fea --- /dev/null +++ b/bazel/third_party/zlib/BUILD.bazel @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Thie empty BUILD.bazel file is required to make Bazel treat +# this directory as a package. diff --git a/bazel/third_party/zlib/zlib.BUILD b/bazel/third_party/zlib/zlib.BUILD new file mode 100644 index 0000000000..d8139b63d6 --- /dev/null +++ b/bazel/third_party/zlib/zlib.BUILD @@ -0,0 +1,111 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Copyright 2008 Google Inc. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# Copied from https://github.com/protocolbuffers/protobuf/blob/v3.9.1/third_party/zlib.BUILD + +load("@rules_cc//cc:defs.bzl", "cc_library") + +_ZLIB_HEADERS = [ + "crc32.h", + "deflate.h", + "gzguts.h", + "inffast.h", + "inffixed.h", + "inflate.h", + "inftrees.h", + "trees.h", + "zconf.h", + "zlib.h", + "zutil.h", +] + +_ZLIB_PREFIXED_HEADERS = ["zlib/include/" + hdr for hdr in _ZLIB_HEADERS] + +# In order to limit the damage from the `includes` propagation +# via `:zlib`, copy the public headers to a subdirectory and +# expose those. +genrule( + name = "copy_public_headers", + srcs = _ZLIB_HEADERS, + outs = _ZLIB_PREFIXED_HEADERS, + cmd = "cp $(SRCS) $(@D)/zlib/include/", + visibility = ["//visibility:private"], +) + +cc_library( + name = "zlib", + srcs = [ + "adler32.c", + "compress.c", + "crc32.c", + "deflate.c", + "gzclose.c", + "gzlib.c", + "gzread.c", + "gzwrite.c", + "infback.c", + "inffast.c", + "inflate.c", + "inftrees.c", + "trees.c", + "uncompr.c", + "zutil.c", + ], + hdrs = _ZLIB_PREFIXED_HEADERS, + copts = select({ + ":windows": [], + "//conditions:default": [ + "-Wno-unused-variable", + "-Wno-implicit-function-declaration", + ], + }), + includes = ["zlib/include/"], + visibility = ["//visibility:public"], +) + +config_setting( + name = "windows", + constraint_values = [ + "@platforms//os:windows", + ], +) diff --git a/community/cases.md b/community/cases.md index 83eac750d7..893021ca70 100644 --- a/community/cases.md +++ b/community/cases.md @@ -60,4 +60,22 @@ * 公司名称: 欢聚时代 * 落地项目: 推荐、直播 * 使用版本: 基于社区版本定制 -* 信息提供者:chenBright \ No newline at end of file +* 信息提供者:chenBright + +## brpc 在 Apache Doris 中的应用 +* 落地项目:Apache Doris +* 使用版本:1.2.0 +* 使用情况:Apache Doris 作为一款 MPP 分析型数据库,其内部节点间使用 Apache Brpc 作为主要 RPC 框架。Brpc 为 Doris 提供了稳定易用的高性能通信机制。并且 BRPC 提供的 bthread,bvar 等基础库,以及各种性能调试工具,也极大的方便了 Doris 的开发和调试工作。 +* 信息提供者:morningman + +## brpc在BaikalDB中的应用 +* 落地项目:BaikalDB +* 使用版本:社区版0.9.7&百度内部stable +* 使用情况:BaikalDB是一款面向OLTP场景为主的NewSQL数据库,其内部节点均采用brpc框架通信,实现数据处理和复制以及集群管理,brpc的高性能对OLTP场景的低延迟至关重要,同时brpc集成的性能调优工具对SQL性能优化提效显著。 +* 信息提供者:tullyliu + +## brpc在数美科技的落地情况 +* 公司名称: 数美科技(nextdata) +* 落地项目: 智能风控业务(模型特征服务、规则引擎) +* 使用版本: 1.0.0 +* 信息提供者:day253 diff --git a/community/newcommitter.md b/community/newcommitter.md index 94589871ad..4cd7bc2e52 100644 --- a/community/newcommitter.md +++ b/community/newcommitter.md @@ -6,12 +6,13 @@ ### 成为committer的路程 1. 提名者在private@brpc中发起讨论和投票,投票通过即OK (最少3+1, +1 > -1),投票邮件模版https://community.apache.org/newcommitter.html#committer-vote-template 2. 提名者发送close vote邮件给private@brpc和private@incubator ,标题可以为subject [RESULT][VOTE],close邮件模版https://community.apache.org/newcommitter.html#close-vote -3. 被提名者填写[CLA](https://www.apache.org/licenses/contributor-agreements.html), 个人贡献者需要下载[ICLA](https://www.apache.org/licenses/icla.pdf)填写个人信息并签名,发送电子版给 secretary@apache.org。个人信息填写项(除签名外)可以使用 PDF 阅读器或浏览器填写,填写后保存进行签名。签名方式支持: +3. 提名者给被提名者发invite letter,并得到回复后再提示他提交ICLA,模版见https://community.apache.org/newcommitter.html#Committer%20Invite%20Template +4. 被提名者填写[CLA](https://www.apache.org/licenses/contributor-agreements.html), 个人贡献者需要下载[ICLA](https://www.apache.org/licenses/icla.pdf)填写个人信息并签名,发送电子版给 secretary@apache.org。个人信息填写项(除签名外)可以使用 PDF 阅读器或浏览器填写,填写后保存进行签名。签名方式支持: - 打印 pdf 文件,手写签名后扫描为电子版; - 使用支持手写的设备进行电子签名; - 使用 `gpg` 进行电子签名,即对填写好个人基本信息的 pdf 文件进行操作(需要提前生成与登记邮箱匹配的公钥/密钥对):`gpg --armor --detach-sign icla.pdf`; - 使用 `DocuSign` 进行签名; -4. 提名者发送announce邮件到dev@brpc.apache.org +5. 提名者发送announce邮件到dev@brpc.apache.org ### 如何赋予committer在github上的权限 diff --git a/community/oncall.md b/community/oncall.md index ad09f1ac41..4c1231cb22 100644 --- a/community/oncall.md +++ b/community/oncall.md @@ -46,3 +46,9 @@ | 07/25/2022 - 07/31/2022 | 王伟冰 | https://lists.apache.org/thread/83scwkkfxrp6kkkoltbrn1fthfy3w0qz | 08/08/2022 - 08/14/2022 | 何磊 | https://lists.apache.org/thread/jj16rzfh34yrt6o0xqfdz9wtdtzxzswq | 08/15/2022 - 08/21/2022 | 刘帅 | https://lists.apache.org/thread/jp69sm7c8fs3dkdd828qk0fsojqwwz6h +| 09/05/2022 - 09/12/2022 | 王伟冰 | https://lists.apache.org/thread/4jjk2hxw9s2wskccclqb8fvpqxqffnlb +| 09/12/2022 - 09/18/2022 | 蔡道进| https://lists.apache.org/thread/8mo7zl0l2yrd8tp4v3kx86rnlyfk6wz4 +| 09/19/2022 - 09/25/2022 | 何磊 | https://lists.apache.org/thread/qlkr7cmwow3ob47dt80tpx0zrgzg7bdm +| 09/26/2022 - 10/09/2022 | 刘帅 | https://lists.apache.org/thread/b0lwr8wyflmhqlnf0kkh1j30tgt5qw54 +| 10/10/2022 - 10/16/2022 | 朱佳顺 | https://lists.apache.org/thread/y8sgbprxt21j6r0812dlftosfov6pbgk +| 10/17/2022 - 10/23/2022 | 李磊 | https://lists.apache.org/thread/qn2270p9qsrglkh7oy013ts1zk5rlhwx diff --git a/community/release_cn.md b/community/release_cn.md index f3a33ef356..02b58db50a 100644 --- a/community/release_cn.md +++ b/community/release_cn.md @@ -6,6 +6,7 @@ brpc 发布apache release 版本流程step by step 3. 第一次投票:在dev@brpc邮件群里投票 4. 第二次投票:在general@incubator.apache.org邮件群里投票 5. 发版通告:包括更新brpc网站,发邮件 +6. 最后要将release分支合并到master分支 # 签名准备 @@ -26,7 +27,7 @@ gpg --version gpg --full-gen-key ``` -根据提示完成创建key,注意邮箱要使用Apache邮件地址: +根据提示完成创建key,注意邮箱要使用Apache邮件地址,`Real Name`使用姓名Pinyin、Apache ID或GitHub ID等均可: ``` gpg (GnuPG) 2.3.1; Copyright (C) 2021 Free Software Foundation, Inc. This is free software: you are free to change and redistribute it. @@ -127,10 +128,18 @@ uid [ultimate] LorinLee (lorinlee's key) sub rsa4096 2021-10-17 [E] ``` -将上面的 fingerprint 粘贴到⾃己的⽤户信息中: https://id.apache.org +将上面的 fingerprint `C30F 211F 0718 9425 8497 F463 92E1 8A11 B658 5834` 粘贴到⾃己Apache⽤户信息 https://id.apache.org 的`OpenPGP Public Key Primary Fingerprint:`字段中。 # 发布包准备 +## 0. 拉出发版分支 + +如果是发布新的2位版本,如`1.0.0`,则需要从master拉出新的分支`release-1.0`。 + +如果是在已有的2位版本上发布新的3位版本,如`1.0.1`版本,则只需要在已有的`release-1.0`分支上修改加上要发布的内容。 + +发版过程中的操作都在release分支(如`release-1.0`)上操作,如果发版过程发现代码有问题需要修改,也在该分支上进行修改。发版完成后,将该分支合回master分支。 + ## 1. 编辑 RELEASE_VERSION 文件 ### 更新RELEASE_VERSION文件 @@ -147,10 +156,17 @@ sub rsa4096 2021-10-17 [E] set(BRPC_VERSION 1.0.0) ``` +### 更新/package/rpm/brpc.spec文件 +编辑项目根目录下`/package/rpm/brpc.spec`文件,更新版本号,并提交至代码仓库,本文以`1.0.0`版本为例,修改Version为: + +``` +Version: 1.0.0 +``` + ## 2. 创建发布 tag 拉取发布分支,并推送tag ```bash -git clone -b ${branch} git@github.com:apache/incubator-brpc.git ~/incubator-brpc +git clone -b release-1.0 git@github.com:apache/incubator-brpc.git ~/incubator-brpc cd ~/incubator-brpc @@ -222,9 +238,11 @@ cp ~/incubator-brpc/apache-brpc-1.0.0-incubating-src.tar.gz.sha512 ~/brpc_svn/de ## 4. 提交SVN -使用Apache LDAP账号提交SVN +退回到上级目录,使用Apache LDAP账号提交SVN ```bash +cd ~/brpc_svn/dev/brpc + svn add * svn --username=lorinlee commit -m "release 1.0.0" @@ -294,7 +312,7 @@ tar xvzf tag-1.0.0.tar.gz tar xvzf apache-brpc-1.0.0-incubating-src.tar.gz -diff -r brpc-1.0.0 apache-brpc-1.0.0-incubating-src +diff -r incubator-brpc-1.0.0 apache-brpc-1.0.0-incubating-src ``` ### 2. 检查源码包的文件内容 @@ -328,7 +346,8 @@ diff -r brpc-1.0.0 apache-brpc-1.0.0-incubating-src [VOTE] Release Apache brpc (Incubating) 1.0.0 ``` -正文: +正文: +注:`Release Commit ID`填写当前release发版分支最后一个commit的commit id。 ``` Hi Apache brpc (Incubating) Community, @@ -350,7 +369,7 @@ https://github.com/apache/incubator-brpc/commit/xxx Keys to verify the Release Candidate: https://dist.apache.org/repos/dist/dev/incubator/brpc/KEYS -The vote will be open for at least 72 hours or until necessary number of +The vote will be open for at least 72 hours or until the necessary number of votes are reached. Please vote accordingly: @@ -358,7 +377,7 @@ Please vote accordingly: [ ] +0 no opinion [ ] -1 disapprove with the reason -PMC vote is +1 binding, all others is +1 non-binding. +PMC vote is +1 binding, all others are +1 non-binding. Checklist for reference: [ ] Download links are valid. @@ -531,7 +550,7 @@ svn mv https://dist.apache.org/repos/dist/dev/incubator/brpc/1.0.0 https://dist. ## 2. Github版本发布 -在 GitHub Releases 页面的对应版本上点击 Edit +在 [GitHub Releases 页面](https://github.com/apache/incubator-brpc/tags)的对应版本上点击,创建新的Release页面 编辑版本号及版本说明,并点击 Publish release ## 3. 更新下载页面 @@ -556,16 +575,17 @@ GPG签名文件和哈希校验文件的下载链接应该使用这个前缀:ht [ANNOUNCE] Apache brpc (Incubating) 1.0.0 released ``` -正文 +正文: +注:`Brief notes of this release`仅需列出本次发版的主要变更,且无需指出对应贡献人和PR编号,建议参考下之前的Announce邮件。 ``` Hi all, The Apache brpc (Incubating) community is glad to announce the new release of Apache brpc (Incubating) 1.0.0. -brpc is an industrial-grade RPC framework with extremely high performance, -and it supports multiple protocols, full rpc features, and has many -convenient tools. +brpc is an Industrial-grade RPC framework using C++ Language, which is +often used in high performance systems such as Search, Storage, +Machine learning, Advertisement, Recommendation etc. Brief notes of this release: - xxx diff --git a/config_brpc.sh b/config_brpc.sh index d0de0fdb2e..e475d9025f 100755 --- a/config_brpc.sh +++ b/config_brpc.sh @@ -38,9 +38,10 @@ else LDD=ldd fi -TEMP=`getopt -o v: --long headers:,libs:,cc:,cxx:,with-glog,with-thrift,with-mesalink,nodebugsymbols -n 'config_brpc' -- "$@"` +TEMP=`getopt -o v: --long headers:,libs:,cc:,cxx:,with-glog,with-thrift,with-rdma,with-mesalink,nodebugsymbols -n 'config_brpc' -- "$@"` WITH_GLOG=0 WITH_THRIFT=0 +WITH_RDMA=0 WITH_MESALINK=0 DEBUGSYMBOLS=-g @@ -64,6 +65,7 @@ while true; do --cxx ) CXX=$2; shift 2 ;; --with-glog ) WITH_GLOG=1; shift 1 ;; --with-thrift) WITH_THRIFT=1; shift 1 ;; + --with-rdma) WITH_RDMA=1; shift 1 ;; --with-mesalink) WITH_MESALINK=1; shift 1 ;; --nodebugsymbols ) DEBUGSYMBOLS=; shift 1 ;; -- ) shift; break ;; @@ -352,6 +354,18 @@ if [ $WITH_THRIFT != 0 ]; then fi fi +if [ $WITH_RDMA != 0 ]; then + RDMA_LIB=$(find_dir_of_lib_or_die ibverbs) + RDMA_HDR=$(find_dir_of_header_or_die infiniband/verbs.h) + append_to_output_libs "$RDMA_LIB" + append_to_output_headers "$RDMA_HDR" + + CPPFLAGS="${CPPFLAGS} -DBRPC_WITH_RDMA" + + append_to_output "DYNAMIC_LINKINGS+=-libverbs" + append_to_output "WITH_RDMA=1" +fi + if [ $WITH_MESALINK != 0 ]; then CPPFLAGS="${CPPFLAGS} -DUSE_MESALINK" fi diff --git a/docs/cn/bvar_c++.md b/docs/cn/bvar_c++.md index dc5dddd062..478335fec6 100644 --- a/docs/cn/bvar_c++.md +++ b/docs/cn/bvar_c++.md @@ -259,7 +259,7 @@ dump功能由如下gflags控制: | bvar_dump_include | "" | Dump bvar matching these wildcards(separated by comma), empty means including all | | bvar_dump_interval | 10 | Seconds between consecutive dump | | bvar_dump_prefix | \ | Every dumped name starts with this prefix | -| bvar_dump_tabs | \ | Dump bvar into different tabs according to the filters (seperated by semicolon), format: *(tab_name=wildcards) | +| bvar_dump_tabs | \ | Dump bvar into different tabs according to the filters (separated by semicolon), format: *(tab_name=wildcards) | 当bvar_dump_file不为空时,程序会启动一个后台导出线程以bvar_dump_interval指定的间隔更新bvar_dump_file,其中包含了被bvar_dump_include匹配且不被bvar_dump_exclude匹配的所有bvar。 diff --git a/docs/cn/client.md b/docs/cn/client.md index 6da1690907..076549fb75 100755 --- a/docs/cn/client.md +++ b/docs/cn/client.md @@ -141,6 +141,32 @@ BNS是百度内常用的命名服务,比如bns://rdev.matrix.all,其中"bns" 如果consul不可访问,服务可自动降级到file naming service获取服务列表。此功能默认关闭,可通过设置-consul\_enable\_degrade\_to\_file\_naming\_service来打开。服务列表文件目录通过-consul \_file\_naming\_service\_dir来设置,使用service-name作为文件名。该文件可通过consul-template生成,里面会保存consul不可用之前最新的下游服务节点。当consul恢复时可自动恢复到consul naming service。 + +### nacos://\ + +NacosNamingService使用[Open-Api](https://nacos.io/zh-cn/docs/open-api.html)定时从nacos获取服务列表。 +NacosNamingService支持[简单鉴权](https://nacos.io/zh-cn/docs/auth.html)。 + +``是一个http uri query,具体参数参见`/nacos/v1/ns/instance/list`文档。 +注意:``需要urlencode。 +``` +nacos://serviceName=test&groupName=g&namespaceId=n&clusters=c&healthyOnly=true +``` + +NacosNamingService拉取列表的时间间隔为`/nacos/v1/ns/instance/list`api返回的`cacheMillis`。 +NacosNamingService只支持整形的权重值。 + +| GFlags | 描述 | 默认值 | +| ---------------------------------- | -------------------------- | ---------------------------- | +| nacos_address | nacos http url | "" | +| nacos_service_discovery_path | nacos服务发现路径 | "/nacos/v1/ns/instance/list" | +| nacos_service_auth_path | nacos登陆路径 | "/nacos/v1/auth/login" | +| nacos_service_timeout_ms | 连接nacos超时时间(毫秒) | 200 | +| nacos_username | 用户名(urlencode编码) | "" | +| nacos_password | 密码(urlencode编码) | "" | +| nacos_load_balancer | nacos集群的负载均衡 | "rr" | + + ### 更多命名服务 用户可以通过实现brpc::NamingService来对接更多命名服务,具体见[这里](https://github.com/brpc/brpc/blob/master/docs/cn/load_balancing.md#%E5%91%BD%E5%90%8D%E6%9C%8D%E5%8A%A1) diff --git a/docs/cn/execution_queue.md b/docs/cn/execution_queue.md index 6e95ba2987..17dcbcfa82 100644 --- a/docs/cn/execution_queue.md +++ b/docs/cn/execution_queue.md @@ -157,7 +157,7 @@ int execution_queue_execute(ExecutionQueueId id, // Execute a task with options. e.g // bthread::execution_queue_execute(queue, task, &bthread::TASK_OPTIONS_URGENT) // If |options| is NULL, we will use default options (normal task) -// If |handle| is not NULL, we will assign it with the hanlder of this task. +// If |handle| is not NULL, we will assign it with the handler of this task. template int execution_queue_execute(ExecutionQueueId id, typename butil::add_const_reference::type task, diff --git a/docs/cn/getting_started.md b/docs/cn/getting_started.md index bf9dca1f93..3afdb56041 100644 --- a/docs/cn/getting_started.md +++ b/docs/cn/getting_started.md @@ -170,6 +170,17 @@ $ sh run_tests.sh ### 使用cmake编译brpc 参考[这里](#使用cmake编译brpc) +### 使用vcpkg编译brpc + +[vcpkg](https://github.com/microsoft/vcpkg) 是一个全平台支持的包管理器,你可以使用以下步骤vcpkg轻松编译brpc: + +```shell +$ git clone https://github.com/microsoft/vcpkg.git +$ ./bootstrap-vcpkg.bat # 使用 powershell +$ ./bootstrap-vcpkg.sh # 使用 bash +$ ./vcpkg install brpc +``` + ## 自己构建依赖的Linux ### 依赖准备 diff --git a/docs/cn/http_client.md b/docs/cn/http_client.md index 957ca13632..66d3292c45 100644 --- a/docs/cn/http_client.md +++ b/docs/cn/http_client.md @@ -222,7 +222,7 @@ brpc client支持在读取完body前就结束RPC,让用户在RPC结束后再 class ProgressiveReader { public: // Called when one part was read. - // Error returned is treated as *permenant* and the socket where the + // Error returned is treated as *permanent* and the socket where the // data was read will be closed. // A temporary error may be handled by blocking this function, which // may block the HTTP parsing on the socket. diff --git a/docs/cn/http_service.md b/docs/cn/http_service.md index 6aaa4b889b..c8945eb170 100644 --- a/docs/cn/http_service.md +++ b/docs/cn/http_service.md @@ -17,7 +17,7 @@ brpc中http和h2的编程接口基本没有区别。除非特殊说明,所有 ## 前缀为/ServiceName/MethodName -定义一个service名为ServiceName(不包含package名), method名为MethodName的pb服务,且让request和reponse定义为空,则该服务默认在/ServiceName/MethodName上提供http/h2服务。 +定义一个service名为ServiceName(不包含package名), method名为MethodName的pb服务,且让request和response定义为空,则该服务默认在/ServiceName/MethodName上提供http/h2服务。 request和response可为空是因为数据都在Controller中: diff --git a/docs/cn/io.md b/docs/cn/io.md index e695d27258..f2e352fe0a 100644 --- a/docs/cn/io.md +++ b/docs/cn/io.md @@ -40,7 +40,7 @@ linux一般使用non-blocking IO提高IO并发度。当IO并发度很低时,no 存储SocketUniquePtr还是SocketId取决于是否需要强引用。像Controller贯穿了RPC的整个流程,和Socket中的数据有大量交互,它存放的是SocketUniquePtr。epoll主要是提醒对应fd上发生了事件,如果Socket回收了,那这个事件是可有可无的,所以它存放了SocketId。 -由于SocketUniquePtr只要有效,其中的数据就不会变,这个机制使用户不用关心麻烦的race conditon和ABA problem,可以放心地对共享的fd进行操作。这种方法也规避了隐式的引用计数,内存的ownership明确,程序的质量有很好的保证。brpc中有大量的SocketUniquePtr和SocketId,它们确实简化了我们的开发。 +由于SocketUniquePtr只要有效,其中的数据就不会变,这个机制使用户不用关心麻烦的race condition和ABA problem,可以放心地对共享的fd进行操作。这种方法也规避了隐式的引用计数,内存的ownership明确,程序的质量有很好的保证。brpc中有大量的SocketUniquePtr和SocketId,它们确实简化了我们的开发。 事实上,Socket不仅仅用于管理原生的fd,它也被用来管理其他资源。比如SelectiveChannel中的每个Sub Channel都被置入了一个Socket中,这样SelectiveChannel可以像普通channel选择下游server那样选择一个Sub Channel进行发送。这个假Socket甚至还实现了健康检查。Streaming RPC也使用了Socket以复用wait-free的写出过程。 diff --git a/docs/cn/json2pb.md b/docs/cn/json2pb.md index 79d7dd4f72..fc41073f61 100644 --- a/docs/cn/json2pb.md +++ b/docs/cn/json2pb.md @@ -39,7 +39,7 @@ repeated int32 numbers = 1; ```protobuf // protobuf message Foo { - required int32 numbers = 1; + repeated int32 numbers = 1; } // rapidjson diff --git a/docs/cn/rdma.md b/docs/cn/rdma.md new file mode 100644 index 0000000000..0c5dd96cb1 --- /dev/null +++ b/docs/cn/rdma.md @@ -0,0 +1,60 @@ +# 编译 + +由于RDMA对驱动与硬件有要求,目前仅支持在Linux系统编译并运行RDMA功能。 + +使用config_brpc: +```bash +sh config_brpc.sh --with-rdma --headers="/usr/include" --libs="/usr/lib64 /usr/bin" +make + +cd example/rdma_performance # 示例程序 +make +``` + +使用cmake: +```bash +mkdir bld && cd bld && cmake -DWITH_RDMA=ON .. +make + +cd example/rdma_performance # 示例程序 +mkdir bld && cd bld && cmake .. +make +``` + +# 基本实现 + +RDMA与TCP不同,不使用socket接口进行通信。但是在实现上仍然复用了brpc中原本的Socket类。当用户选择ChannelOptions或ServerOptions中的use_rdma为true时,创建出的Socket类中则有对应的RdmaEndpoint(参见src/brpc/rdma/rdma_endpoint.cpp)。当RDMA被使能时,写入Socket的数据会通过RdmaEndpoint提交给RDMA QP(通过verbs API),而非拷贝到fd。对于数据读取,RdmaEndpoint中则调用verbs API从RDMA CQ中获取对应完成信息(事件获取有独立的fd,复用EventDispatcher,处理函数采用RdmaEndpoint::PollCq),最后复用InputMessenger完成RPC消息解析。 + +brpc内部使用RDMA RC模式,每个RdmaEndpoint对应一个QP。RDMA连接建立依赖于前置TCP建连,TCP建连后双方交换必要参数,如GID、QPN等,再发起RDMA连接并实现数据传输。这个过程我们称为握手(参见RdmaEndpoint)。因为握手需要TCP连接,因此RdmaEndpoint所在的Socket类中,原本的TCP fd仍然有效。握手过程采用了brpc中已有的AppConnect逻辑。注意,握手用的TCP连接在后续数据传输阶段并不会收发数据,但仍保持为EST状态。一旦TCP连接中断,其上对应的RDMA连接同样会置错。 + +RdmaEndpoint数据传输逻辑的第一个重要特性是零拷贝。要发送的所有数据默认都存放在IOBuf的Block中,因此所发送的Block需要等到对端确认接收完成后才可以释放,这些Block的引用被存放于RdmaEndpoint::_sbuf中。而要实现接收零拷贝,则需要确保接受端所预提交的接收缓冲区必须直接在IOBuf的Block里面,被存放于RdmaEndpoint::_rbuf。注意,接收端预提交的每一段Block,有一个固定的大小(recv_block_size)。发送端发送时,一个请求最多只能有这么大,否则接收端则无法成功接收。 + +RdmaEndpoint数据传输逻辑的第二个重要特性是滑动窗口流控。这一流控机制是为了避免发送端持续在发送,其速度超过了接收端处理的速度。TCP传输中也有类似的逻辑,但是是由内核协议栈来实现的。RdmaEndpoint内实现了这一流控机制,通过接收端显式回复ACK来确认接收端处理完毕。为了减少ACK本身的开销,让ACK以立即数形式返回,可以被附在数据消息里。 + +RdmaEndpoint数据传输逻辑的第三个重要特性是事件聚合。每个消息的大小被限定在一个recv_block_size,默认为8KB。如果每个消息都触发事件进行处理,会导致性能退化严重,甚至不如TCP传输(TCP拥有GSO、GRO等诸多优化)。因此,RdmaEndpoint综合考虑数据大小、窗口与ACK的情况,对每个发送消息选择性设置solicited标志,来控制是否在发送端触发事件通知。 + +RDMA要求数据收发所使用的内存空间必须被注册(memory register),把对应的页表映射注册给网卡,这一操作非常耗时,所以通常都会使用内存池方案来加速。brpc内部的数据收发都使用IOBuf,为了在兼容IOBuf的情况下实现完全零拷贝,整个IOBuf所使用的内存空间整体由统一内存池接管(参见src/brpc/rdma/block_pool.cpp)。注意,由于IOBuf内存池不由用户直接控制,因此实际使用中需要注意IOBuf所消耗的总内存,建议根据实际业务需求,一次性注册足够的内存池以实现性能最大化。 + +RDMA是硬件相关的通信技术,有很多独特的概念,比如device、port、GID、LID、MaxSge等。这些参数在初始化时会从对应的网卡中读取出来,并且做出默认的选择(参见src/brpc/rdma/rdma_helper.cpp)。有时默认的选择并非用户的期望,则可以通过flag参数方式指定。 + +# 参数 + +可配置参数说明: +* rdma_trace_verbose: 日志中打印RDMA建连相关信息,默认false +* rdma_recv_zerocopy: 是否启用接收零拷贝,默认true +* rdma_zerocopy_min_size: 接收零拷贝最小的msg大小,默认512B +* rdma_recv_block_type: 为接收数据预准备的block类型,分为三类default(8KB)/large(64KB)/huge(2MB),默认为default +* rdma_prepared_qp_size: 程序启动预生成的QP的大小,默认128 +* rdma_prepared_qp_cnt: 程序启动预生成的QP的数量,默认1024 +* rdma_max_sge: 允许的最大发送SGList长度,默认为0,即采用硬件所支持的最大长度 +* rdma_sq_size: SQ大小,默认128 +* rdma_rq_size: RQ大小,默认128 +* rdma_cqe_poll_once: 从CQ中一次性poll出的CQE数量,默认32 +* rdma_gid_index: 使用本地GID表中的Index,默认为-1,即选用最大的可用GID Index +* rdma_port: 使用IB设备的port number,默认为1 +* rdma_device: 使用IB设备的名称,默认为空,即使用第一个active的设备 +* rdma_memory_pool_initial_size_mb: 内存池的初始大小,单位MB,默认1024 +* rdma_memory_pool_increase_size_mb: 内存池每次动态增长的大小,单位MB,默认1024 +* rdma_memory_pool_max_regions: 最大的内存池块数,默认16 +* rdma_memory_pool_buckets: 内存池中为避免竞争采用的bucket数目,默认为4 +* rdma_memory_pool_tls_cache_num: 内存池中thread local的缓存block数目,默认为128 diff --git a/docs/cn/rpc_press.md b/docs/cn/rpc_press.md index 611b613655..764b20dab4 100644 --- a/docs/cn/rpc_press.md +++ b/docs/cn/rpc_press.md @@ -5,6 +5,7 @@ rpc_press无需写代码就压测各种rpc server,目前支持的协议有: - sofa-pbrpc - public_pbrpc - nova_pbrpc +- google_grpc # 获取工具 diff --git a/docs/cn/streaming_rpc.md b/docs/cn/streaming_rpc.md index 987ce37b17..14a44c22c8 100644 --- a/docs/cn/streaming_rpc.md +++ b/docs/cn/streaming_rpc.md @@ -47,7 +47,7 @@ struct StreamOptions size_t messages_in_batch; // Handle input message, if handler is NULL, the remote side is not allowd to - // write any message, who will get EBADF on writting + // write any message, who will get EBADF on writing // default: NULL StreamInputHandler* handler; }; diff --git a/docs/en/client.md b/docs/en/client.md index dac05b41ab..75cb504b03 100644 --- a/docs/en/client.md +++ b/docs/en/client.md @@ -139,6 +139,32 @@ If the server list returned by the consul does not follow [response format](http If consul is not accessible, the naming service can be automatically downgraded to file naming service. This feature is turned off by default and can be turned on by setting -consul\_enable\_degrade\_to\_file\_naming\_service. After downgrading, in the directory specified by -consul\_file\_naming\_service\_dir, the file whose name is the service-name will be used. This file can be generated by the consul-template, which holds the latest server list before the consul is unavailable. The consul naming service is automatically restored when consul is restored. + +### nacos://\ + +NacosNamingService gets a list of servers from nacos periodically by [Open-Api](https://nacos.io/en-us/docs/open-api.html). +NacosNamingService supports [simple authentication](https://nacos.io/en-us/docs/auth.html). + +`` is a http uri query,For more detail, refer to `/nacos/v1/ns/instance/list` api document. +NOTE: `` must be url-encoded. +``` +nacos://serviceName=test&groupName=g&namespaceId=n&clusters=c&healthyOnly=true +``` + +The server list is cached for `cacheMillis` milliseconds as specified in the response of `/nacos/v1/ns/instance/list` api. +NOTE: The value of server weight must be an integer. + +| GFlags | Description | Default value | +| ---------------------------------- | ------------------------------------ | ---------------------------- | +| nacos_address | nacos http url | "" | +| nacos_service_discovery_path | path for discovery | "/nacos/v1/ns/instance/list" | +| nacos_service_auth_path | path for login | "/nacos/v1/auth/login" | +| nacos_service_timeout_ms | timeout for connecting to nacos(ms) | 200 | +| nacos_username | url-encoded username | "" | +| nacos_password | url-encoded password | "" | +| nacos_load_balancer | load balancer for nacos clusters | "rr" | + + ### More naming services User can extend to more naming services by implementing brpc::NamingService, check [this link](https://github.com/brpc/brpc/blob/master/docs/cn/load_balancing.md#%E5%91%BD%E5%90%8D%E6%9C%8D%E5%8A%A1) for details. diff --git a/docs/en/getting_started.md b/docs/en/getting_started.md index 3baf3a67eb..b263bb49c0 100644 --- a/docs/en/getting_started.md +++ b/docs/en/getting_started.md @@ -107,6 +107,18 @@ Examples link brpc statically, if you need to link the shared version, remove `C $ mkdir build && cd build && cmake -DBUILD_UNIT_TESTS=ON .. && make && make test ``` +### Compile brpc with vcpkg + +[vcpkg](https://github.com/microsoft/vcpkg) is a package manager that supports all platforms, +you can use vcpkg to build brpc with the following step: + +```shell +$ git clone https://github.com/microsoft/vcpkg.git +$ ./bootstrap-vcpkg.bat # for powershell +$ ./bootstrap-vcpkg.sh # for bash +$ ./vcpkg install brpc +``` + ## Fedora/CentOS ### Prepare deps diff --git a/docs/en/http_client.md b/docs/en/http_client.md index fb41a6b42e..a6e0e34aba 100644 --- a/docs/en/http_client.md +++ b/docs/en/http_client.md @@ -227,7 +227,7 @@ How to use: class ProgressiveReader { public: // Called when one part was read. - // Error returned is treated as *permenant* and the socket where the + // Error returned is treated as *permanent* and the socket where the // data was read will be closed. // A temporary error may be handled by blocking this function, which // may block the HTTP parsing on the socket. diff --git a/docs/en/memcache_client.md b/docs/en/memcache_client.md index decca09d53..4f9a4918c1 100644 --- a/docs/en/memcache_client.md +++ b/docs/en/memcache_client.md @@ -58,7 +58,7 @@ Notes on above code: - The class of the request must be `MemcacheRequest`, response must be `MemcacheResponse`, otherwise `CallMethod` fails. `stub` is not necessary, just call `channel.CallMethod` with `method` to NULL. - Call `request.XXX()` to add an operation, where `XXX` is `Set` in this example. Multiple operations inside a request are sent to a memcached server together (often referred to as "pipeline mode"). - call `response.PopXXX()` to pop result of an operation from the response, where `XXX` is `Set` in this example. true is returned on success, and false otherwise in which case use `response.LastError()` to get the error message. `XXX` must match the corresponding operation in the request, otherwise the pop is rejected. In above example, a `PopGet` would fail with the error message of "not a GET response". -- Results of `Pop` are independent from the RPC result. Even if "a value cannot be put into the memcached", the RPC may still be successful. RPC failure means things like broken connection, timeout etc. If the business logic requires the memcache operations to be succesful, you should test successfulness of both RPC and `PopXXX`. +- Results of `Pop` are independent from the RPC result. Even if "a value cannot be put into the memcached", the RPC may still be successful. RPC failure means things like broken connection, timeout etc. If the business logic requires the memcache operations to be successful, you should test successfulness of both RPC and `PopXXX`. Supported operations currently: diff --git a/docs/en/overview.md b/docs/en/overview.md index c26909cfdd..32185e0bf8 100644 --- a/docs/en/overview.md +++ b/docs/en/overview.md @@ -83,7 +83,7 @@ brpc pays special attentions to development and maintenance efficency, you can [ Although almost all RPC implementations claim that they're "high-performant", the numbers are probably just numbers. Being really high-performant in different scenarios is difficult. To unify communication infra inside Baidu, brpc goes much deeper at performance than other implementations. -* Reading and parsing requests from different clients is fully parallelized and users don't need to distinguish between "IO-threads" and "Processing-threads". Other implementations probably have "IO-threads" and "Processing-threads" and hash file descriptors(fd) into IO-threads. When a IO-thread handles one of its fds, other fds in the thread can't be handled. If a message is large, other fds are significantly delayed. Although different IO-threads run in parallel, you won't have many IO-threads since they don't have too much to do generally except reading/parsing from fds. If you have 10 IO-threads, one fd may affect 10% of all fds, which is unacceptable to industrial online services (requiring 99.99% availability). The problem will be worse when fds are distributed unevenly accross IO-threads (unfortunately common), or the service is multi-tenancy (common in cloud services). In brpc, reading from different fds is parallelized and even processing different messages from one fd is parallelized as well. Parsing a large message does not block other messages from the same fd, not to mention other fds. More details can be found [here](io.md#receiving-messages). +* Reading and parsing requests from different clients is fully parallelized and users don't need to distinguish between "IO-threads" and "Processing-threads". Other implementations probably have "IO-threads" and "Processing-threads" and hash file descriptors(fd) into IO-threads. When a IO-thread handles one of its fds, other fds in the thread can't be handled. If a message is large, other fds are significantly delayed. Although different IO-threads run in parallel, you won't have many IO-threads since they don't have too much to do generally except reading/parsing from fds. If you have 10 IO-threads, one fd may affect 10% of all fds, which is unacceptable to industrial online services (requiring 99.99% availability). The problem will be worse when fds are distributed unevenly across IO-threads (unfortunately common), or the service is multi-tenancy (common in cloud services). In brpc, reading from different fds is parallelized and even processing different messages from one fd is parallelized as well. Parsing a large message does not block other messages from the same fd, not to mention other fds. More details can be found [here](io.md#receiving-messages). * Writing into one fd and multiple fds is highly concurrent. When multiple threads write into the same fd (common for multiplexed connections), the first thread directly writes in-place and other threads submit their write requests in [wait-free](https://en.wikipedia.org/wiki/Non-blocking_algorithm#Wait-freedom) manner. One fd can be written into 5,000,000 16-byte messages per second by a couple of highly-contended threads. More details can be found [here](io.md#sending-messages). * Minimal locks. High-QPS services can utilize all CPU power on the machine. For example, [creating bthreads](../cn/memory_management.md) for processing requests, [setting up timeout](../cn/timer_keeping.md), [finding RPC contexts](../cn/bthread_id.md) according to response, [recording performance counters](bvar.md) are all highly concurrent. Users see very few contentions (via [contention profiler](../cn/contention_profiler.md)) caused by RPC framework even if the service runs at 500,000+ QPS. * Server adjusts thread number according to load. Traditional implementations set number of threads according to latency to avoid limiting the throughput. brpc creates a new [bthread](../cn/bthread.md) for each request and ends the bthread when the request is done, which automatically adjusts thread number according to load. diff --git a/docs/en/rdma.md b/docs/en/rdma.md new file mode 100644 index 0000000000..c07789e4fc --- /dev/null +++ b/docs/en/rdma.md @@ -0,0 +1,60 @@ +# Build + +Since RDMA requires driver and hardware support, only the build on linux is verified. + +With config_brpc: +```bash +sh config_brpc.sh --with-rdma --headers="/usr/include" --libs="/usr/lib64 /usr/bin" +make + +cd example/rdma_performance # example for rdma +make +``` + +With cmake: +```bash +mkdir bld && cd bld && cmake -DWITH_RDMA=ON .. +make + +cd example/rdma_performance # example for rdma +mkdir bld && cd bld && cmake .. +make +``` + +# Basic Implementation + +RDMA does not use socket API like TCP. However, the brpc::Socket class is still used. If a user sets ChannelOptions.use_rdma or ServerOptions.use_rdma to true, the Socket class created has RdmaEndpoint (see src/brpc/rdma/rdma_endpoint.cpp). When RDMA is enabled, the data which need to transmit will be posted to RDMA QP with verbs API, not written to TCP fd. For data receiving, RdmaEndpoint will get completions from RDMA CQ with verbs API (the event will be generated from a dedicated fd and be added into EventDispatcher, the handling function is RdmaEndpoint::PollCq) before parsing RPC messages with InputMessenger. + +brpc uses RDMA RC mode. Every RdmaEndpoint has its own QP. Before establishing RDMA connection, a TCP connection is necessary to exchange some information such as GID and QPN. We call this procedure handshake. Since handshake needs TCP connection, the TCP fd in the corresponding Socket is still valid. The handshake procedure is completed in the AppConnect way in brpc. The TCP connection will keep in EST state but not be used for data transmission after RDMA connection is established. Once the TCP connection is closed, the corresponding RDMA connection will be set error. + +The first key feature in RdmaEndpoint data transmission is zero copy. All data which need to transmit is in the Blocks of IOBuf. Thus all the Blocks need to be released after the remote side completes the receiving. The reference of these Blocks are stored in RdmaEndpoint::_sbuf. In order to realize receiving zero copy, the receive side must post receive buffers in Blocks of IOBuf, which are stored in RdmaEndpoint::_rbuf. Note that all the Blocks posted in the receive side has a fixed size (recv_block_size). The transmit side can only send message smaller than that. Otherwise the receive side cannot receive data successfully. + +The second key feature in RdmaEndpoint data transmission is sliding window flow control. The flow control is to avoid fast transmit side overwhelming slow receive side. TCP has similar mechanism in kernel TCP stack. RdmaEndpoint implements this mechanism with explicit ACKs from receive side. to reduce the overhead of ACKs, the ACK number can be piggybacked in ordinary data message as immediate data. + +The third key feature in RdmaEndpoint data transmission is event suppression. The size of every message is limited to recv_block_size (defaulty 8KB). If every message will generate an event, the performance will be very poor, even worse than TCP (TCP has GSO/GRO). Therefore, RdmaEndpoint set solicited flag for every message according to data size, window and ACKS. The flag can control whether to generate an event in remove side or not. + +All the memory used for data transmission in RDMA must be registered, which is very inefficient. Generally, a memory pool is employed to avoid frequent memory registration. In fact, brpc uses IOBuf for data transmission. In order to realize total zerocopy and compatibility with IOBuf, the memory used by IOBuf is taken over by the RDMA memory pool (see src/brpc/rdma/block_pool.cpp). Since IOBuf buffer cannot be controlled by user directly, the total memory consumption in IOBuf should be carefully managed. It is suggested that the application registers enough memory at one time according to its requirement. + +RDMA is hardware-related. It has some different concepts such as device, port, GID, LID, MaxSge and so on. These parameters can be read from NICs at initialization, and brpc will make the default choice (see src/brpc/rdma/rdma_helper.cpp). Sometimes the default choice is not the expectation, then it can be changed in the flag way. + +# Parameters + +Congifurable parameterss: +* rdma_trace_verbose: to print RDMA connection information in log,default is false +* rdma_recv_zerocopy: enable zero copy in receive side,default is true +* rdma_zerocopy_min_size: the min message size for receive zero copy (in Byte),default is 512 +* rdma_recv_block_type: the block type used for receiving, can be default(8KB)/large(64KB)/huge(2MB),default is default +* rdma_prepared_qp_size: the size of QP created at the begining of the application,default is 128 +* rdma_prepared_qp_cnt: the number of QPs created at the begining of the application,default is 1024 +* rdma_max_sge: the max length of sglist, default is 0, which is the max length allowed by the device +* rdma_sq_size: the size of SQ,default is 128 +* rdma_rq_size: the size of RQ,default is 128 +* rdma_cqe_poll_once: the number of CQE pooled from CQ once,default is 32 +* rdma_gid_index: the index of local GID table used,default is -1,which is the maximum GID index +* rdma_port: the port number used,default is 1 +* rdma_device: the IB device name,default is empty,which is the first active device +* rdma_memory_pool_initial_size_mb: the initial region size of RDMA memory pool (in MB),default is 1024 +* rdma_memory_pool_increase_size_mb: the step increase region size of RDMA memory pool (in MB),default is 1024 +* rdma_memory_pool_max_regions: the max number of regions in RDMA memory pool,default is 16 +* rdma_memory_pool_buckets: the number of buckets for avoiding mutex contention in RDMA memory pool,default is 4 +* rdma_memory_pool_tls_cache_num: the number of thread local cached blocks in RDMA memory pool,default is 128 diff --git a/docs/en/status.md b/docs/en/status.md index 69a6a4e009..1d02c871d2 100644 --- a/docs/en/status.md +++ b/docs/en/status.md @@ -10,7 +10,7 @@ Meanings of the fields above: - **connection_count**: number of connections to the server from clients, not including number of outward connections which are displayed at /vars/rpc_channel_connection_count. - **example.EchoService**: Full name of the service, including the package name defined in proto. - **Echo (EchoRequest) returns (EchoResponse)**: Signature of the method. A service can have multiple methods. Click links on request/response to see schemes of the protobuf messages. -- **count**: Number of requests that are succesfully processed. +- **count**: Number of requests that are successfully processed. - **error**: Number of requests that are failed to process. - **latency**: average latency in recent *60s/60m/24h/30d* from *right to left* on html, average latency in recent 10s(by default, specified by [-bvar_dump_interval](http://brpc.baidu.com:8765/flags/bvar_dump_interval)) on plain texts. - **latency_percentiles**: 80%, 90%, 99%, 99.9% percentiles of latency in 10 seconds(specified by[-bvar_dump_interval](http://brpc.baidu.com:8765/flags/bvar_dump_interval)). Curves with historical values are shown on html. diff --git a/docs/en/streaming_rpc.md b/docs/en/streaming_rpc.md index 8ea32e2318..58ee7a9637 100644 --- a/docs/en/streaming_rpc.md +++ b/docs/en/streaming_rpc.md @@ -47,7 +47,7 @@ struct StreamOptions size_t messages_in_batch; // Handle input message, if handler is NULL, the remote side is not allowd to - // write any message, who will get EBADF on writting + // write any message, who will get EBADF on writing // default: NULL StreamInputHandler* handler; }; diff --git a/example/BUILD b/example/BUILD.bazel similarity index 91% rename from example/BUILD rename to example/BUILD.bazel index d688749d83..5ef87e0a66 100644 --- a/example/BUILD +++ b/example/BUILD.bazel @@ -13,6 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +load("@rules_proto//proto:defs.bzl", "proto_library") +load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_proto_library") + COPTS = [ "-D__STDC_FORMAT_MACROS", "-DBTHREAD_USE_FAST_PTHREAD_MUTEX", @@ -27,7 +30,7 @@ COPTS = [ "-fno-omit-frame-pointer", "-DGFLAGS_NS=google", ] + select({ - "//:with_glog": ["-DBRPC_WITH_GLOG=1"], + "//bazel/config:brpc_with_glog": ["-DBRPC_WITH_GLOG=1"], "//conditions:default": ["-DBRPC_WITH_GLOG=0"], }) @@ -50,6 +53,7 @@ cc_binary( srcs = [ "echo_c++/server.cpp", ], + copts = COPTS, includes = [ "echo_c++", ], @@ -57,7 +61,6 @@ cc_binary( ":cc_echo_c++_proto", "//:brpc", ], - copts = COPTS, ) cc_binary( @@ -65,6 +68,7 @@ cc_binary( srcs = [ "echo_c++/client.cpp", ], + copts = COPTS, includes = [ "echo_c++", ], @@ -72,5 +76,4 @@ cc_binary( ":cc_echo_c++_proto", "//:brpc", ], - copts = COPTS, ) diff --git a/example/build_with_old_bazel/.bazelrc b/example/build_with_old_bazel/.bazelrc new file mode 100644 index 0000000000..14a9b50878 --- /dev/null +++ b/example/build_with_old_bazel/.bazelrc @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +build --incompatible_disable_deprecated_attr_params=false diff --git a/example/build_with_old_bazel/.bazelversion b/example/build_with_old_bazel/.bazelversion new file mode 100644 index 0000000000..e3eed59fa7 --- /dev/null +++ b/example/build_with_old_bazel/.bazelversion @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +0.28.0 diff --git a/example/build_with_old_bazel/BUILD.bazel b/example/build_with_old_bazel/BUILD.bazel new file mode 100644 index 0000000000..8394dcb657 --- /dev/null +++ b/example/build_with_old_bazel/BUILD.bazel @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +genrule( + name = "empty_cc", + outs = ["empty.cc"], + cmd = "echo 'int main(){return 0;}' > $@", +) + +cc_binary( + name = "empty", + srcs = [":empty_cc"], + deps = [ + "@com_github_brpc_brpc//:brpc", + ], +) diff --git a/example/build_with_old_bazel/WORKSPACE b/example/build_with_old_bazel/WORKSPACE new file mode 100644 index 0000000000..02ef4c52f9 --- /dev/null +++ b/example/build_with_old_bazel/WORKSPACE @@ -0,0 +1,134 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# +# WARNING: This example is not a best practice for how to build with bRPC in bazel. +# + +workspace(name = "com_github_brpc_brpc_example_build_with_old_bazel") + +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") + +# +# Constants +# + +BAZEL_SKYLIB_VERSION = "1.1.1" # 2021-09-27T17:33:49Z + +BAZEL_SKYLIB_SHA256 = "c6966ec828da198c5d9adbaa94c05e3a1c7f21bd012a0b29ba8ddbccb2c93b0d" + +RULES_PROTO_COMMIT_ID = "9f8407ec90b579cba157ce481682b2beb1f7409f" + +RULES_PROTO_SHA256 = "3a27bf90d4cd3e4546afa801857d35c3c4db5f0680c840167f6fb2f7078de177" + +RULES_CC_COMMIT_ID = "b7fe9697c0c76ab2fd431a891dbb9a6a32ed7c3e" + +RULES_CC_SHA256 = "29daf0159f0cf552fcff60b49d8bcd4f08f08506d2da6e41b07058ec50cfeaec" + +# +# Starlark libraries +# + +http_archive( + name = "bazel_skylib", + sha256 = BAZEL_SKYLIB_SHA256, + urls = [ + "https://github.com/bazelbuild/bazel-skylib/releases/download/{version}/bazel-skylib-{version}.tar.gz".format(version = BAZEL_SKYLIB_VERSION), + "https://mirror.bazel.build/github.com/bazelbuild/bazel-skylib/releases/download/{version}/bazel-skylib-{version}.tar.gz".format(version = BAZEL_SKYLIB_VERSION), + ], +) + +http_archive( + name = "rules_proto", + sha256 = RULES_PROTO_SHA256, + strip_prefix = "rules_proto-{version}".format(version = RULES_PROTO_COMMIT_ID), + urls = ["https://github.com/bazelbuild/rules_proto/archive/{version}.tar.gz".format(version = RULES_PROTO_COMMIT_ID)], +) + +http_archive( + name = "rules_cc", + sha256 = RULES_CC_SHA256, + strip_prefix = "rules_cc-{commit_id}".format(commit_id = RULES_CC_COMMIT_ID), + urls = [ + "https://github.com/bazelbuild/rules_cc/archive/{commit_id}.tar.gz".format(commit_id = RULES_CC_COMMIT_ID), + ], +) + +# +# C++ Dependencies +# +# Ordered lexicographical. +# + +local_repository( + name = "com_github_brpc_brpc", + path = "../../", +) + +http_archive( + name = "com_github_gflags_gflags", + sha256 = "a8263376b409900dd46830e4e34803a170484707327854cc252fc5865275a57d", + strip_prefix = "gflags-46f73f88b18aee341538c0dfc22b1710a6abedef", + url = "https://github.com/gflags/gflags/archive/46f73f88b18aee341538c0dfc22b1710a6abedef.tar.gz", +) + +http_archive( + name = "com_github_google_glog", + build_file = "//:glog.BUILD", + strip_prefix = "glog-a6a166db069520dbbd653c97c2e5b12e08a8bb26", + url = "https://github.com/google/glog/archive/a6a166db069520dbbd653c97c2e5b12e08a8bb26.tar.gz", +) + +http_archive( + name = "com_github_google_leveldb", + build_file = "//:leveldb.BUILD", + sha256 = "3912ac36dbb264a62797d68687711c8024919640d89b6733f9342ada1d16cda1", + strip_prefix = "leveldb-a53934a3ae1244679f812d998a4f16f2c7f309a6", + url = "https://github.com/google/leveldb/archive/a53934a3ae1244679f812d998a4f16f2c7f309a6.tar.gz", +) + +http_archive( + name = "com_github_madler_zlib", # 2017-01-15T17:57:23Z + build_file = "@com_github_brpc_brpc//bazel/third_party/zlib:zlib.BUILD", + sha256 = "c3e5e9fdd5004dcb542feda5ee4f0ff0744628baf8ed2dd5d66f8ca1197cb1a1", + strip_prefix = "zlib-1.2.11", + urls = ["https://zlib.net/zlib-1.2.11.tar.gz"], +) + +http_archive( + name = "com_google_googletest", + strip_prefix = "googletest-0fe96607d85cf3a25ac40da369db62bbee2939a5", + url = "https://github.com/google/googletest/archive/0fe96607d85cf3a25ac40da369db62bbee2939a5.tar.gz", +) + +http_archive( + name = "com_google_protobuf", + sha256 = "9510dd2afc29e7245e9e884336f848c8a6600a14ae726adb6befdb4f786f0be2", + strip_prefix = "protobuf-3.6.1.3", + type = "zip", + url = "https://github.com/protocolbuffers/protobuf/archive/v3.6.1.3.zip", +) + +# This is not a correct approach, just for simplicity. +# rules_foreign_cc didn't support too early version of bazel. +# bRPC need to be patched to work with boringssl for now. + +new_local_repository( + name = "openssl", + build_file = "//:openssl.BUILD", + path = "/usr", +) diff --git a/leveldb.BUILD b/example/build_with_old_bazel/leveldb.BUILD similarity index 100% rename from leveldb.BUILD rename to example/build_with_old_bazel/leveldb.BUILD diff --git a/openssl.BUILD b/example/build_with_old_bazel/openssl.BUILD similarity index 51% rename from openssl.BUILD rename to example/build_with_old_bazel/openssl.BUILD index e2d02eb0b6..92a687a0e4 100644 --- a/openssl.BUILD +++ b/example/build_with_old_bazel/openssl.BUILD @@ -1,3 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + package( default_visibility=["//visibility:public"] ) diff --git a/example/build_with_old_bazel/zlib.BUILD b/example/build_with_old_bazel/zlib.BUILD new file mode 100644 index 0000000000..0830cc0f93 --- /dev/null +++ b/example/build_with_old_bazel/zlib.BUILD @@ -0,0 +1,23 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +package( + default_visibility=["//visibility:public"] +) + +cc_library( + name = "zlib", + linkopts = ["-lz"], +) diff --git a/example/rdma_performance/CMakeLists.txt b/example/rdma_performance/CMakeLists.txt new file mode 100644 index 0000000000..d5c217c25a --- /dev/null +++ b/example/rdma_performance/CMakeLists.txt @@ -0,0 +1,150 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +cmake_minimum_required(VERSION 2.8.10) +project(rdma_performance C CXX) + +option(LINK_SO "Whether examples are linked dynamically" OFF) + +execute_process( + COMMAND bash -c "find ${PROJECT_SOURCE_DIR}/../.. -type d -regex \".*output/include$\" | head -n1 | xargs dirname | tr -d '\n'" + OUTPUT_VARIABLE OUTPUT_PATH +) + +set(CMAKE_PREFIX_PATH ${OUTPUT_PATH}) + +include(FindThreads) +include(FindProtobuf) +protobuf_generate_cpp(PROTO_SRC PROTO_HEADER test.proto) +# include PROTO_HEADER +include_directories(${CMAKE_CURRENT_BINARY_DIR}) + +# Search for libthrift* by best effort. If it is not found and brpc is +# compiled with thrift protocol enabled, a link error would be reported. +find_library(THRIFT_LIB NAMES thrift) +if (NOT THRIFT_LIB) + set(THRIFT_LIB "") +endif() +find_library(THRIFTNB_LIB NAMES thriftnb) +if (NOT THRIFTNB_LIB) + set(THRIFTNB_LIB "") +endif() + +find_path(BRPC_INCLUDE_PATH NAMES brpc/server.h) +if(LINK_SO) + find_library(BRPC_LIB NAMES brpc) +else() + find_library(BRPC_LIB NAMES libbrpc.a brpc) +endif() +if((NOT BRPC_INCLUDE_PATH) OR (NOT BRPC_LIB)) + message(FATAL_ERROR "Fail to find brpc") +endif() +include_directories(${BRPC_INCLUDE_PATH}) + +find_path(GFLAGS_INCLUDE_PATH gflags/gflags.h) +find_library(GFLAGS_LIBRARY NAMES gflags libgflags) +if((NOT GFLAGS_INCLUDE_PATH) OR (NOT GFLAGS_LIBRARY)) + message(FATAL_ERROR "Fail to find gflags") +endif() +include_directories(${GFLAGS_INCLUDE_PATH}) + +execute_process( + COMMAND bash -c "grep \"namespace [_A-Za-z0-9]\\+ {\" ${GFLAGS_INCLUDE_PATH}/gflags/gflags_declare.h | head -1 | awk '{print $2}' | tr -d '\n'" + OUTPUT_VARIABLE GFLAGS_NS +) +if(${GFLAGS_NS} STREQUAL "GFLAGS_NAMESPACE") + execute_process( + COMMAND bash -c "grep \"#define GFLAGS_NAMESPACE [_A-Za-z0-9]\\+\" ${GFLAGS_INCLUDE_PATH}/gflags/gflags_declare.h | head -1 | awk '{print $3}' | tr -d '\n'" + OUTPUT_VARIABLE GFLAGS_NS + ) +endif() +if(CMAKE_SYSTEM_NAME STREQUAL "Darwin") + include(CheckFunctionExists) + CHECK_FUNCTION_EXISTS(clock_gettime HAVE_CLOCK_GETTIME) + if(NOT HAVE_CLOCK_GETTIME) + set(DEFINE_CLOCK_GETTIME "-DNO_CLOCK_GETTIME_IN_MAC") + endif() +endif() + +set(CMAKE_CPP_FLAGS "${DEFINE_CLOCK_GETTIME} -DGFLAGS_NS=${GFLAGS_NS} -DBRPC_WITH_RDMA=1") +set(CMAKE_CXX_FLAGS "${CMAKE_CPP_FLAGS} -DNDEBUG -O2 -D__const__= -pipe -W -Wall -Wno-unused-parameter -fPIC -fno-omit-frame-pointer") + +if(CMAKE_VERSION VERSION_LESS "3.1.3") + if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") + endif() + if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") + endif() +else() + set(CMAKE_CXX_STANDARD 11) + set(CMAKE_CXX_STANDARD_REQUIRED ON) +endif() + +find_path(LEVELDB_INCLUDE_PATH NAMES leveldb/db.h) +find_library(LEVELDB_LIB NAMES leveldb) +if ((NOT LEVELDB_INCLUDE_PATH) OR (NOT LEVELDB_LIB)) + message(FATAL_ERROR "Fail to find leveldb") +endif() +include_directories(${LEVELDB_INCLUDE_PATH}) + +if(CMAKE_SYSTEM_NAME STREQUAL "Darwin") + set(OPENSSL_ROOT_DIR + "/usr/local/opt/openssl" # Homebrew installed OpenSSL + ) +endif() + +find_package(OpenSSL) +include_directories(${OPENSSL_INCLUDE_DIR}) + +find_path(RDMA_INCLUDE_PATH NAMES infiniband/verbs.h) +find_library(RDMA_LIB NAMES ibverbs) +if ((NOT RDMA_INCLUDE_PATH) OR (NOT RDMA_LIB)) + message(FATAL_ERROR "Fail to find ibverbs") +endif() + +set(DYNAMIC_LIB + ${CMAKE_THREAD_LIBS_INIT} + ${GFLAGS_LIBRARY} + ${PROTOBUF_LIBRARIES} + ${LEVELDB_LIB} + ${OPENSSL_CRYPTO_LIBRARY} + ${OPENSSL_SSL_LIBRARY} + ${THRIFT_LIB} + ${THRIFTNB_LIB} + dl + ) + +if(CMAKE_SYSTEM_NAME STREQUAL "Darwin") + set(DYNAMIC_LIB ${DYNAMIC_LIB} + pthread + "-framework CoreFoundation" + "-framework CoreGraphics" + "-framework CoreData" + "-framework CoreText" + "-framework Security" + "-framework Foundation" + "-Wl,-U,_MallocExtension_ReleaseFreeMemory" + "-Wl,-U,_ProfilerStart" + "-Wl,-U,_ProfilerStop") +endif() + +add_executable(client client.cpp ${PROTO_SRC} ${PROTO_HEADER}) +add_executable(server server.cpp ${PROTO_SRC} ${PROTO_HEADER}) + +target_link_libraries(client ${BRPC_LIB} ${DYNAMIC_LIB}) +target_link_libraries(server ${BRPC_LIB} ${DYNAMIC_LIB}) diff --git a/example/rdma_performance/Makefile b/example/rdma_performance/Makefile new file mode 100644 index 0000000000..9e3e262778 --- /dev/null +++ b/example/rdma_performance/Makefile @@ -0,0 +1,98 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +NEED_GPERFTOOLS=0 +BRPC_PATH=../.. +include $(BRPC_PATH)/config.mk +# Notes on the flags: +# 1. Added -fno-omit-frame-pointer: perf/tcmalloc-profiler use frame pointers by default +# 2. Added -D__const__= : Avoid over-optimizations of TLS variables by GCC>=4.8 +CXXFLAGS+=$(CPPFLAGS) -std=c++0x -DNDEBUG -O2 -D__const__= -pipe -W -Wall -Wno-unused-parameter -fPIC -fno-omit-frame-pointer +ifeq ($(NEED_GPERFTOOLS), 1) + CXXFLAGS+=-DBRPC_ENABLE_CPU_PROFILER +endif +HDRS+=$(BRPC_PATH)/output/include +LIBS+=$(BRPC_PATH)/output/lib + +HDRPATHS=$(addprefix -I, $(HDRS)) +LIBPATHS=$(addprefix -L, $(LIBS)) +COMMA=, +SOPATHS=$(addprefix -Wl$(COMMA)-rpath$(COMMA), $(LIBS)) + +CLIENT_SOURCES = client.cpp +SERVER_SOURCES = server.cpp +PROTOS = $(wildcard *.proto) + +PROTO_OBJS = $(PROTOS:.proto=.pb.o) +PROTO_GENS = $(PROTOS:.proto=.pb.h) $(PROTOS:.proto=.pb.cc) +CLIENT_OBJS = $(addsuffix .o, $(basename $(CLIENT_SOURCES))) +SERVER_OBJS = $(addsuffix .o, $(basename $(SERVER_SOURCES))) + +ifeq ($(SYSTEM),Darwin) + ifneq ("$(LINK_SO)", "") + STATIC_LINKINGS += -lbrpc + else + # *.a must be explicitly specified in clang + STATIC_LINKINGS += $(BRPC_PATH)/output/lib/libbrpc.a + endif + LINK_OPTIONS_SO = $^ $(STATIC_LINKINGS) $(DYNAMIC_LINKINGS) + LINK_OPTIONS = $^ $(STATIC_LINKINGS) $(DYNAMIC_LINKINGS) +else ifeq ($(SYSTEM),Linux) + STATIC_LINKINGS += -lbrpc + LINK_OPTIONS_SO = -Xlinker "-(" $^ -Xlinker "-)" $(STATIC_LINKINGS) $(DYNAMIC_LINKINGS) + LINK_OPTIONS = -Xlinker "-(" $^ -Wl,-Bstatic $(STATIC_LINKINGS) -Wl,-Bdynamic -Xlinker "-)" $(DYNAMIC_LINKINGS) +endif + +.PHONY:all +ifdef WITH_RDMA +all: client server +else +all: +endif + +.PHONY:clean +clean: + @echo "> Cleaning" + rm -rf client server $(PROTO_GENS) $(PROTO_OBJS) $(CLIENT_OBJS) $(SERVER_OBJS) + +client:$(PROTO_OBJS) $(CLIENT_OBJS) + @echo "> Linking $@" +ifneq ("$(LINK_SO)", "") + $(CXX) $(LIBPATHS) $(SOPATHS) $(LINK_OPTIONS_SO) -o $@ +else + $(CXX) $(LIBPATHS) $(LINK_OPTIONS) -o $@ +endif + +server:$(PROTO_OBJS) $(SERVER_OBJS) + @echo "> Linking $@" +ifneq ("$(LINK_SO)", "") + $(CXX) $(LIBPATHS) $(SOPATHS) $(LINK_OPTIONS_SO) -o $@ +else + $(CXX) $(LIBPATHS) $(LINK_OPTIONS) -o $@ +endif + +%.pb.cc %.pb.h:%.proto + @echo "> Generating $@" + $(PROTOC) --cpp_out=. --proto_path=. $(PROTOC_EXTRA_ARGS) $< + +%.o:%.cpp + @echo "> Compiling $@" + $(CXX) -c $(HDRPATHS) $(CXXFLAGS) $< -o $@ + +%.o:%.cc + @echo "> Compiling $@" + $(CXX) -c $(HDRPATHS) $(CXXFLAGS) $< -o $@ \ No newline at end of file diff --git a/example/rdma_performance/client.cpp b/example/rdma_performance/client.cpp new file mode 100644 index 0000000000..5d0178afd6 --- /dev/null +++ b/example/rdma_performance/client.cpp @@ -0,0 +1,310 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include +#include "butil/atomicops.h" +#include "butil/fast_rand.h" +#include "butil/logging.h" +#include "brpc/rdma/rdma_helper.h" +#include "brpc/server.h" +#include "brpc/channel.h" +#include "bthread/bthread.h" +#include "bvar/latency_recorder.h" +#include "bvar/variable.h" +#include "test.pb.h" + +DEFINE_int32(thread_num, 0, "How many threads are used"); +DEFINE_int32(queue_depth, 1, "How many requests can be pending in the queue"); +DEFINE_int32(expected_qps, 0, "The expected QPS"); +DEFINE_int32(max_thread_num, 16, "The max number of threads are used"); +DEFINE_int32(attachment_size, -1, "Attachment size is used (in Bytes)"); +DEFINE_bool(echo_attachment, false, "Select whether attachment should be echo"); +DEFINE_string(connection_type, "single", "Connection type of the channel"); +DEFINE_string(protocol, "baidu_std", "Protocol type."); +DEFINE_string(servers, "0.0.0.0:8002+0.0.0.0:8002", "IP Address of servers"); +DEFINE_bool(use_rdma, true, "Use RDMA or not"); +DEFINE_int32(rpc_timeout_ms, 2000, "RPC call timeout"); +DEFINE_int32(test_seconds, 20, "Test running time"); +DEFINE_int32(test_iterations, 0, "Test iterations"); +DEFINE_int32(dummy_port, 8001, "Dummy server port number"); + +bvar::LatencyRecorder g_latency_recorder("client"); +bvar::LatencyRecorder g_server_cpu_recorder("server_cpu"); +bvar::LatencyRecorder g_client_cpu_recorder("client_cpu"); +butil::atomic g_last_time(0); +butil::atomic g_total_bytes; +butil::atomic g_total_cnt; +std::vector g_servers; +int rr_index = 0; +volatile bool g_stop = false; + +butil::atomic g_token(10000); + +static void* GenerateToken(void* arg) { + int64_t start_time = butil::monotonic_time_ns(); + int64_t accumulative_token = g_token.load(butil::memory_order_relaxed); + while (!g_stop) { + bthread_usleep(100000); + int64_t now = butil::monotonic_time_ns(); + if (accumulative_token * 1000000000 / (now - start_time) < FLAGS_expected_qps) { + int64_t delta = FLAGS_expected_qps * (now - start_time) / 1000000000 - accumulative_token; + g_token.fetch_add(delta, butil::memory_order_relaxed); + accumulative_token += delta; + } + } + return NULL; +} + +class PerformanceTest { +public: + PerformanceTest(int attachment_size, bool echo_attachment) + : _addr(NULL) + , _channel(NULL) + , _start_time(0) + , _iterations(0) + , _stop(false) + { + if (attachment_size > 0) { + _addr = malloc(attachment_size); + butil::fast_rand_bytes(_addr, attachment_size); + _attachment.append(_addr, attachment_size); + } + _echo_attachment = echo_attachment; + } + + ~PerformanceTest() { + if (_addr) { + free(_addr); + } + delete _channel; + } + + inline bool IsStop() { return _stop; } + + int Init() { + brpc::ChannelOptions options; + options.use_rdma = FLAGS_use_rdma; + options.protocol = FLAGS_protocol; + options.connection_type = FLAGS_connection_type; + options.timeout_ms = FLAGS_rpc_timeout_ms; + options.max_retry = 0; + std::string server = g_servers[(rr_index++) % g_servers.size()]; + _channel = new brpc::Channel(); + if (_channel->Init(server.c_str(), &options) != 0) { + LOG(ERROR) << "Fail to initialize channel"; + return -1; + } + brpc::Controller cntl; + test::PerfTestResponse response; + test::PerfTestRequest request; + request.set_echo_attachment(_echo_attachment); + test::PerfTestService_Stub stub(_channel); + stub.Test(&cntl, &request, &response, NULL); + if (cntl.Failed()) { + LOG(ERROR) << "RPC call failed: " << cntl.ErrorText(); + return -1; + } + return 0; + } + + struct RespClosure { + brpc::Controller* cntl; + test::PerfTestResponse* resp; + PerformanceTest* test; + }; + + void SendRequest() { + if (FLAGS_expected_qps > 0) { + while (g_token.load(butil::memory_order_relaxed) <= 0) { + bthread_usleep(10); + } + g_token.fetch_sub(1, butil::memory_order_relaxed); + } + RespClosure* closure = new RespClosure; + test::PerfTestRequest request; + closure->resp = new test::PerfTestResponse(); + closure->cntl = new brpc::Controller(); + request.set_echo_attachment(_echo_attachment); + closure->cntl->request_attachment().append(_attachment); + closure->test = this; + google::protobuf::Closure* done = brpc::NewCallback(&HandleResponse, closure); + test::PerfTestService_Stub stub(_channel); + stub.Test(closure->cntl, &request, closure->resp, done); + } + + static void HandleResponse(RespClosure* closure) { + std::unique_ptr cntl_guard(closure->cntl); + std::unique_ptr response_guard(closure->resp); + if (closure->cntl->Failed()) { + LOG(ERROR) << "RPC call failed: " << closure->cntl->ErrorText(); + closure->test->_stop = true; + return; + } + + g_latency_recorder << closure->cntl->latency_us(); + if (closure->resp->cpu_usage().size() > 0) { + g_server_cpu_recorder << atof(closure->resp->cpu_usage().c_str()) * 100; + } + g_total_bytes.fetch_add(closure->cntl->request_attachment().size(), butil::memory_order_relaxed); + g_total_cnt.fetch_add(1, butil::memory_order_relaxed); + + cntl_guard.reset(NULL); + response_guard.reset(NULL); + + if (closure->test->_iterations == 0 && FLAGS_test_iterations > 0) { + closure->test->_stop = true; + return; + } + --closure->test->_iterations; + uint64_t last = g_last_time.load(butil::memory_order_relaxed); + uint64_t now = butil::gettimeofday_us(); + if (now > last && now - last > 100000) { + if (g_last_time.exchange(now, butil::memory_order_relaxed) == last) { + g_client_cpu_recorder << + atof(bvar::Variable::describe_exposed("process_cpu_usage").c_str()) * 100; + } + } + if (now - closure->test->_start_time > FLAGS_test_seconds * 1000000u) { + closure->test->_stop = true; + return; + } + closure->test->SendRequest(); + } + + static void* RunTest(void* arg) { + PerformanceTest* test = (PerformanceTest*)arg; + test->_start_time = butil::gettimeofday_us(); + test->_iterations = FLAGS_test_iterations; + + for (int i = 0; i < FLAGS_queue_depth; ++i) { + test->SendRequest(); + } + + return NULL; + } + +private: + void* _addr; + brpc::Channel* _channel; + uint64_t _start_time; + uint32_t _iterations; + volatile bool _stop; + butil::IOBuf _attachment; + bool _echo_attachment; +}; + +static void* DeleteTest(void* arg) { + PerformanceTest* test = (PerformanceTest*)arg; + delete test; + return NULL; +} + +void Test(int thread_num, int attachment_size) { + std::cout << "[Threads: " << thread_num + << ", Depth: " << FLAGS_queue_depth + << ", Attachment: " << attachment_size << "B" + << ", RDMA: " << (FLAGS_use_rdma ? "yes" : "no") + << ", Echo: " << (FLAGS_echo_attachment ? "yes]" : "no]") + << std::endl; + g_total_bytes.store(0, butil::memory_order_relaxed); + g_total_cnt.store(0, butil::memory_order_relaxed); + std::vector tests; + for (int k = 0; k < thread_num; ++k) { + PerformanceTest* t = new PerformanceTest(attachment_size, FLAGS_echo_attachment); + if (t->Init() < 0) { + exit(1); + } + tests.push_back(t); + } + uint64_t start_time = butil::gettimeofday_us(); + bthread_t tid[thread_num]; + if (FLAGS_expected_qps > 0) { + bthread_t tid; + bthread_start_background(&tid, &BTHREAD_ATTR_NORMAL, GenerateToken, NULL); + } + for (int k = 0; k < thread_num; ++k) { + bthread_start_background(&tid[k], &BTHREAD_ATTR_NORMAL, + PerformanceTest::RunTest, tests[k]); + } + for (int k = 0; k < thread_num; ++k) { + while (!tests[k]->IsStop()) { + bthread_usleep(10000); + } + } + uint64_t end_time = butil::gettimeofday_us(); + double throughput = g_total_bytes / 1.048576 / (end_time - start_time); + if (FLAGS_test_iterations == 0) { + std::cout << "Avg-Latency: " << g_latency_recorder.latency(10) + << ", 90th-Latency: " << g_latency_recorder.latency_percentile(0.9) + << ", 99th-Latency: " << g_latency_recorder.latency_percentile(0.99) + << ", 99.9th-Latency: " << g_latency_recorder.latency_percentile(0.999) + << ", Throughput: " << throughput << "MB/s" + << ", QPS: " << (g_total_cnt.load(butil::memory_order_relaxed) * 1000 / (end_time - start_time)) << "k" + << ", Server CPU-utilization: " << g_server_cpu_recorder.latency(10) << "\%" + << ", Client CPU-utilization: " << g_client_cpu_recorder.latency(10) << "\%" + << std::endl; + } else { + std::cout << " Throughput: " << throughput << "MB/s" << std::endl; + } + g_stop = true; + for (int k = 0; k < thread_num; ++k) { + bthread_start_background(&tid[k], &BTHREAD_ATTR_NORMAL, DeleteTest, tests[k]); + } +} + +int main(int argc, char* argv[]) { + GFLAGS_NS::ParseCommandLineFlags(&argc, &argv, true); + + // Initialize RDMA environment in advance. + if (FLAGS_use_rdma) { + brpc::rdma::GlobalRdmaInitializeOrDie(); + } + + brpc::StartDummyServerAt(FLAGS_dummy_port); + + std::string::size_type pos1 = 0; + std::string::size_type pos2 = FLAGS_servers.find('+'); + while (pos2 != std::string::npos) { + g_servers.push_back(FLAGS_servers.substr(pos1, pos2 - pos1)); + pos1 = pos2 + 1; + pos2 = FLAGS_servers.find('+', pos1); + } + g_servers.push_back(FLAGS_servers.substr(pos1)); + + if (FLAGS_thread_num > 0 && FLAGS_attachment_size >= 0) { + Test(FLAGS_thread_num, FLAGS_attachment_size); + } else if (FLAGS_thread_num <= 0 && FLAGS_attachment_size >= 0) { + for (int i = 1; i <= FLAGS_max_thread_num; i *= 2) { + Test(i, FLAGS_attachment_size); + } + } else if (FLAGS_thread_num > 0 && FLAGS_attachment_size < 0) { + for (int i = 1; i <= 1024; i *= 4) { + Test(FLAGS_thread_num, i); + } + } else { + for (int j = 1; j <= 1024; j *= 4) { + for (int i = 1; i <= FLAGS_max_thread_num; i *= 2) { + Test(i, j); + } + } + } + + return 0; +} diff --git a/example/rdma_performance/server.cpp b/example/rdma_performance/server.cpp new file mode 100644 index 0000000000..e06f7950b2 --- /dev/null +++ b/example/rdma_performance/server.cpp @@ -0,0 +1,84 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include "butil/atomicops.h" +#include "butil/logging.h" +#include "butil/time.h" +#include "brpc/server.h" +#include "bvar/variable.h" +#include "test.pb.h" + +DEFINE_int32(port, 8002, "TCP Port of this server"); +DEFINE_bool(use_rdma, true, "Use RDMA or not"); + +butil::atomic g_last_time(0); + +namespace test { +class PerfTestServiceImpl : public PerfTestService { +public: + PerfTestServiceImpl() {} + ~PerfTestServiceImpl() {}; + + void Test(google::protobuf::RpcController* cntl_base, + const PerfTestRequest* request, + PerfTestResponse* response, + google::protobuf::Closure* done) { + brpc::ClosureGuard done_guard(done); + uint64_t last = g_last_time.load(butil::memory_order_relaxed); + uint64_t now = butil::monotonic_time_us(); + if (now > last && now - last > 100000) { + if (g_last_time.exchange(now, butil::memory_order_relaxed) == last) { + response->set_cpu_usage(bvar::Variable::describe_exposed("process_cpu_usage")); + } else { + response->set_cpu_usage(""); + } + } else { + response->set_cpu_usage(""); + } + if (request->echo_attachment()) { + brpc::Controller* cntl = + static_cast(cntl_base); + cntl->response_attachment().append(cntl->request_attachment()); + } + } +}; +} + +int main(int argc, char* argv[]) { + GFLAGS_NS::ParseCommandLineFlags(&argc, &argv, true); + + brpc::Server server; + test::PerfTestServiceImpl perf_test_service_impl; + + if (server.AddService(&perf_test_service_impl, + brpc::SERVER_DOESNT_OWN_SERVICE) != 0) { + LOG(ERROR) << "Fail to add service"; + return -1; + } + g_last_time.store(0, butil::memory_order_relaxed); + + brpc::ServerOptions options; + options.use_rdma = FLAGS_use_rdma; + if (server.Start(FLAGS_port, &options) != 0) { + LOG(ERROR) << "Fail to start EchoServer"; + return -1; + } + + server.RunUntilAskedToQuit(); + return 0; +} diff --git a/example/rdma_performance/test.proto b/example/rdma_performance/test.proto new file mode 100644 index 0000000000..8e33009688 --- /dev/null +++ b/example/rdma_performance/test.proto @@ -0,0 +1,33 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +syntax="proto2"; +option cc_generic_services = true; + +package test; + +message PerfTestRequest { + required bool echo_attachment = 1; +}; + +message PerfTestResponse { + required string cpu_usage = 1; +}; + +service PerfTestService { + rpc Test(PerfTestRequest) returns (PerfTestResponse); +}; diff --git a/glog.BUILD b/glog.BUILD deleted file mode 100644 index c8f96dc877..0000000000 --- a/glog.BUILD +++ /dev/null @@ -1,171 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -licenses(["notice"]) - -cc_library( - name = "glog", - srcs = [ - "src/base/commandlineflags.h", - "src/base/googleinit.h", - "src/demangle.cc", - "src/logging.cc", - "src/raw_logging.cc", - "src/symbolize.cc", - "src/utilities.cc", - "src/vlog_is_on.cc", - ], - hdrs = [ - "raw_logging_h", - "src/base/mutex.h", - "src/demangle.h", - "src/symbolize.h", - "src/utilities.h", - "src/glog/log_severity.h", - ":config_h", - ":logging_h", - ":stl_logging_h", - ":vlog_is_on_h", - ], - copts = [ - # Disable warnings that exists in glog - "-Wno-sign-compare", - "-Wno-unused-local-typedefs", - # Inject google namespace as "google" - "-D_START_GOOGLE_NAMESPACE_='namespace google {'", - "-D_END_GOOGLE_NAMESPACE_='}'", - "-DGOOGLE_NAMESPACE='google'", - # Allows src/base/mutex.h to include pthread.h. - "-DHAVE_PTHREAD", - # Allows src/logging.cc to determine the host name. - "-DHAVE_SYS_UTSNAME_H", - # System header files enabler for src/utilities.cc - # Enable system calls from syscall.h - "-DHAVE_SYS_SYSCALL_H", - # Enable system calls from sys/time.h - "-DHAVE_SYS_TIME_H", - "-DHAVE_STDINT_H", - "-DHAVE_STRING_H", - # For logging.cc - "-DHAVE_PREAD", - "-DHAVE_FCNTL", - "-DHAVE_SYS_TYPES_H", - # Allows syslog support - "-DHAVE_SYSLOG_H", - # GFlags - "-isystem $(GENDIR)/external/com_github_gflags_gflags/", - "-DHAVE_LIB_GFLAGS", - # Necessary for creating soft links of log files - "-DHAVE_UNISTD_H", - ], - includes = [ - ".", - "src", - ], - visibility = ["//visibility:public"], - deps = [ - "//external:gflags", - ], -) - -# Below are the generation rules that generates the necessary header -# files for glog. Originally they are generated by CMAKE -# configure_file() command, which replaces certain template -# placeholders in the .in files with provided values. - -# gen_sh is a bash script that provides the values for generated -# header files. Under the hood it is just a wrapper over sed. -genrule( - name = "gen_sh", - outs = [ - "gen.sh", - ], - cmd = """ -cat > $@ <<"EOF" -#! /bin/sh -sed -e 's/@ac_cv_have_unistd_h@/1/g' \ - -e 's/@ac_cv_have_stdint_h@/1/g' \ - -e 's/@ac_cv_have_systypes_h@/1/g' \ - -e 's/@ac_cv_have_libgflags_h@/1/g' \ - -e 's/@ac_cv_have_uint16_t@/1/g' \ - -e 's/@ac_cv_have___builtin_expect@/1/g' \ - -e 's/@ac_cv_have_.*@/0/g' \ - -e 's/@ac_google_start_namespace@/namespace google {/g' \ - -e 's/@ac_google_end_namespace@/}/g' \ - -e 's/@ac_google_namespace@/google/g' \ - -e 's/@ac_cv___attribute___noinline@/__attribute__((noinline))/g' \ - -e 's/@ac_cv___attribute___noreturn@/__attribute__((noreturn))/g' \ - -e 's/@ac_cv___attribute___printf_4_5@/__attribute__((__format__ (__printf__, 4, 5)))/g' -EOF""", -) - -genrule( - name = "config_h", - srcs = [ - "src/config.h.cmake.in", - ], - outs = [ - "config.h", - ], - cmd = "awk '{ gsub(/^#cmakedefine/, \"//cmakedefine\"); print; }' $(<) > $(@)", -) - -genrule( - name = "logging_h", - srcs = [ - "src/glog/logging.h.in", - ], - outs = [ - "glog/logging.h", - ], - cmd = "$(location :gen_sh) < $(<) > $(@)", - tools = [":gen_sh"], -) - -genrule( - name = "raw_logging_h", - srcs = [ - "src/glog/raw_logging.h.in", - ], - outs = [ - "glog/raw_logging.h", - ], - cmd = "$(location :gen_sh) < $(<) > $(@)", - tools = [":gen_sh"], -) - -genrule( - name = "stl_logging_h", - srcs = [ - "src/glog/stl_logging.h.in", - ], - outs = [ - "glog/stl_logging.h", - ], - cmd = "$(location :gen_sh) < $(<) > $(@)", - tools = [":gen_sh"], -) - -genrule( - name = "vlog_is_on_h", - srcs = [ - "src/glog/vlog_is_on.h.in", - ], - outs = [ - "glog/vlog_is_on.h", - ], - cmd = "$(location :gen_sh) < $(<) > $(@)", - tools = [":gen_sh"], -) diff --git a/package/rpm/brpc.spec b/package/rpm/brpc.spec index 9d472b197a..d7cfbc2e20 100644 --- a/package/rpm/brpc.spec +++ b/package/rpm/brpc.spec @@ -18,14 +18,14 @@ # Name: brpc -Version: 1.2.0 +Version: 1.3.0 Release: 1%{?dist} Summary: An industrial-grade RPC framework used throughout Baidu, with 1,000,000+ instances(not counting clients) and thousands kinds of services. Group: Development License: Apache2 URL: https://github.com/apache/incubator-brpc -Source0: incubator-brpc-%{version}.tar.gz +Source0: apache-brpc-%{version}-incubating-src.tar.gz # https://access.redhat.com/solutions/519993 %global _filter_GLIBC_PRIVATE 1 @@ -42,6 +42,9 @@ BuildRequires: devtoolset-8-gcc-c++ %define __strip /opt/rh/devtoolset-8/root/usr/bin/strip %endif +BuildRequires: cmake +BuildRequires: gcc +BuildRequires: gcc-c++ BuildRequires: gflags-devel >= 2.1 BuildRequires: protobuf-devel >= 2.4 BuildRequires: leveldb-devel @@ -64,29 +67,36 @@ Requires: brpc-devel = %{version}-%{release} Static %{name} libraries. %prep -%setup -n incubator-%{name}-%{version} - +%setup -n apache-%{name}-%{version}-incubating-src %build -mkdir -p %{_target_platform} - -pushd %{_target_platform} - %if 0%{?use_devtoolset} . /opt/rh/devtoolset-8/enable %endif -%{cmake} .. +%if 0%{?fedora} >= 33 || 0%{?rhel} >= 8 +%{cmake} -DBUILD_BRPC_TOOLS:BOOLEAN=OFF -DDOWNLOAD_GTEST:BOOLEAN=OFF +%{cmake_build} +%else +mkdir -p %{_target_platform} +pushd %{_target_platform} +%{cmake} -DBUILD_BRPC_TOOLS:BOOLEAN=OFF -DDOWNLOAD_GTEST:BOOLEAN=OFF .. make %{?_smp_mflags} + popd +%endif %install rm -rf $RPM_BUILD_ROOT +%if 0%{?fedora} >= 33 || 0%{?rhel} >= 8 +%{cmake_install} +%else pushd %{_target_platform} %make_install popd +%endif %clean rm -rf $RPM_BUILD_ROOT diff --git a/src/brpc/acceptor.cpp b/src/brpc/acceptor.cpp index 39a2d01282..62732881f2 100644 --- a/src/brpc/acceptor.cpp +++ b/src/brpc/acceptor.cpp @@ -21,6 +21,7 @@ #include "butil/fd_guard.h" // fd_guard #include "butil/fd_utility.h" // make_close_on_exec #include "butil/time.h" // gettimeofday_us +#include "brpc/rdma/rdma_endpoint.h" #include "brpc/acceptor.h" @@ -37,7 +38,8 @@ Acceptor::Acceptor(bthread_keytable_pool_t* pool) , _listened_fd(-1) , _acception_id(0) , _empty_cond(&_map_mutex) - , _ssl_ctx(NULL) { + , _ssl_ctx(NULL) + , _use_rdma(false) { } Acceptor::~Acceptor() { @@ -272,8 +274,17 @@ void Acceptor::OnNewConnectionsUntilEAGAIN(Socket* acception) { options.fd = in_fd; butil::sockaddr2endpoint(&in_addr, in_len, &options.remote_side); options.user = acception->user(); - options.on_edge_triggered_events = InputMessenger::OnNewMessages; options.initial_ssl_ctx = am->_ssl_ctx; +#if BRPC_WITH_RDMA + if (am->_use_rdma) { + options.on_edge_triggered_events = rdma::RdmaEndpoint::OnNewDataFromTcp; + } else { +#else + { +#endif + options.on_edge_triggered_events = InputMessenger::OnNewMessages; + } + options.use_rdma = am->_use_rdma; if (Socket::Create(options, &socket_id) != 0) { LOG(ERROR) << "Fail to create Socket"; continue; diff --git a/src/brpc/acceptor.h b/src/brpc/acceptor.h index fec220bc47..c442a60c8a 100644 --- a/src/brpc/acceptor.h +++ b/src/brpc/acceptor.h @@ -32,6 +32,7 @@ struct ConnectStatistics { // Accept connections from a specific port and then // process messages from which it reads class Acceptor : public InputMessenger { +friend class Server; public: typedef butil::FlatMap SocketMap; @@ -106,6 +107,9 @@ class Acceptor : public InputMessenger { SocketMap _socket_map; std::shared_ptr _ssl_ctx; + + // Whether to use rdma or not + bool _use_rdma; }; } // namespace brpc diff --git a/src/brpc/builtin/hotspots_service.cpp b/src/brpc/builtin/hotspots_service.cpp index 500cb759e5..c757732053 100644 --- a/src/brpc/builtin/hotspots_service.cpp +++ b/src/brpc/builtin/hotspots_service.cpp @@ -589,7 +589,6 @@ static void DisplayResult(Controller* cntl, } break; } - CHECK(!use_html); // NOTE: not send prof_result to os first which does copying. os.move_to(resp); if (use_html) { diff --git a/src/brpc/channel.cpp b/src/brpc/channel.cpp old mode 100755 new mode 100644 index 443a05dbe4..2de1de10dd --- a/src/brpc/channel.cpp +++ b/src/brpc/channel.cpp @@ -32,6 +32,7 @@ #include "brpc/controller.h" #include "brpc/channel.h" #include "brpc/details/usercode_backup_pool.h" // TooManyUserCode +#include "brpc/rdma/rdma_helper.h" #include "brpc/policy/esp_authenticator.h" namespace brpc { @@ -49,6 +50,7 @@ ChannelOptions::ChannelOptions() , connection_type(CONNECTION_TYPE_UNKNOWN) , succeed_without_server(true) , log_succeed_without_server(true) + , use_rdma(false) , auth(NULL) , retry_policy(NULL) , ns_filter(NULL) @@ -100,6 +102,9 @@ static ChannelSignature ComputeChannelSignature(const ChannelOptions& opt) { } else { // All disabled ChannelSSLOptions are the same } + if (opt.use_rdma) { + buf.append("|rdma"); + } butil::MurmurHash3_x64_128_Update(&mm_ctx, buf.data(), buf.size()); buf.clear(); @@ -140,6 +145,21 @@ Channel::~Channel() { } } +#if BRPC_WITH_RDMA +static bool OptionsAvailableForRdma(const ChannelOptions* opt) { + if (opt->has_ssl_options()) { + LOG(WARNING) << "Cannot use SSL and RDMA at the same time"; + return false; + } + if (!rdma::SupportedByRdma(opt->protocol.name())) { + LOG(WARNING) << "Cannot use " << opt->protocol.name() + << " over RDMA"; + return false; + } + return true; +} +#endif + int Channel::InitChannelOptions(const ChannelOptions* options) { if (options) { // Override default options if user provided one. _options = *options; @@ -149,6 +169,19 @@ int Channel::InitChannelOptions(const ChannelOptions* options) { LOG(ERROR) << "Channel does not support the protocol"; return -1; } + + if (_options.use_rdma) { +#if BRPC_WITH_RDMA + if (!OptionsAvailableForRdma(&_options)) { + return -1; + } + rdma::GlobalRdmaInitializeOrDie(); +#else + LOG(WARNING) << "Cannot use rdma since brpc does not compile with rdma"; + return -1; +#endif + } + _serialize_request = protocol->serialize_request; _pack_request = protocol->pack_request; _get_method_name = protocol->get_method_name; @@ -312,7 +345,7 @@ int Channel::InitSingle(const butil::EndPoint& server_addr_and_port, return -1; } if (SocketMapInsert(SocketMapKey(server_addr_and_port, sig), - &_server_id, ssl_ctx) != 0) { + &_server_id, ssl_ctx, _options.use_rdma) != 0) { LOG(ERROR) << "Fail to insert into SocketMap"; return -1; } @@ -349,6 +382,7 @@ int Channel::Init(const char* ns_url, GetNamingServiceThreadOptions ns_opt; ns_opt.succeed_without_server = _options.succeed_without_server; ns_opt.log_succeed_without_server = _options.log_succeed_without_server; + ns_opt.use_rdma = _options.use_rdma; ns_opt.channel_signature = ComputeChannelSignature(_options); if (CreateSocketSSLContext(_options, &ns_opt.ssl_ctx) != 0) { return -1; diff --git a/src/brpc/channel.h b/src/brpc/channel.h index ef97ced3c3..6600fcf25c 100644 --- a/src/brpc/channel.h +++ b/src/brpc/channel.h @@ -102,6 +102,10 @@ struct ChannelOptions { const ChannelSSLOptions& ssl_options() const { return *_ssl_options.get(); } ChannelSSLOptions* mutable_ssl_options(); + // Let this channel use rdma rather than tcp. + // Default: false + bool use_rdma; + // Turn on authentication for this channel if `auth' is not NULL. // Note `auth' will not be deleted by channel and must remain valid when // the channel is being used. diff --git a/src/brpc/controller.cpp b/src/brpc/controller.cpp index dfa4ce1f55..b6c8e750fe 100644 --- a/src/brpc/controller.cpp +++ b/src/brpc/controller.cpp @@ -76,6 +76,10 @@ BAIDU_REGISTER_ERRNO(brpc::ELIMIT, "Reached server's max_concurrency"); BAIDU_REGISTER_ERRNO(brpc::ECLOSE, "Close socket initiatively"); BAIDU_REGISTER_ERRNO(brpc::EITP, "Bad Itp response"); +#if BRPC_WITH_RDMA +BAIDU_REGISTER_ERRNO(brpc::ERDMA, "RDMA verbs error"); +BAIDU_REGISTER_ERRNO(brpc::ERDMAMEM, "Memory not registered for RDMA"); +#endif DECLARE_bool(log_as_json); diff --git a/src/brpc/controller.h b/src/brpc/controller.h old mode 100755 new mode 100644 diff --git a/src/brpc/details/naming_service_thread.cpp b/src/brpc/details/naming_service_thread.cpp index 13acf8ae30..28f49dab16 100644 --- a/src/brpc/details/naming_service_thread.cpp +++ b/src/brpc/details/naming_service_thread.cpp @@ -125,7 +125,8 @@ void NamingServiceThread::Actions::ResetServers( // Socket. SocketMapKey may be passed through AddWatcher. Make sure // to pick those Sockets with the right settings during OnAddedServers const SocketMapKey key(_added[i], _owner->_options.channel_signature); - CHECK_EQ(0, SocketMapInsert(key, &tagged_id.id, _owner->_options.ssl_ctx)); + CHECK_EQ(0, SocketMapInsert(key, &tagged_id.id, _owner->_options.ssl_ctx, + _owner->_options.use_rdma)); _added_sockets.push_back(tagged_id); } diff --git a/src/brpc/details/naming_service_thread.h b/src/brpc/details/naming_service_thread.h index 90dd004e5e..21eef77964 100644 --- a/src/brpc/details/naming_service_thread.h +++ b/src/brpc/details/naming_service_thread.h @@ -44,10 +44,12 @@ class NamingServiceWatcher { struct GetNamingServiceThreadOptions { GetNamingServiceThreadOptions() : succeed_without_server(false) - , log_succeed_without_server(true) {} + , log_succeed_without_server(true) + , use_rdma(false) {} bool succeed_without_server; bool log_succeed_without_server; + bool use_rdma; ChannelSignature channel_signature; std::shared_ptr ssl_ctx; }; diff --git a/src/brpc/errno.proto b/src/brpc/errno.proto index d1ff51cb3a..fccd8edb8d 100644 --- a/src/brpc/errno.proto +++ b/src/brpc/errno.proto @@ -49,4 +49,8 @@ enum Errno { ELIMIT = 2004; // Reached server's limit on resources ECLOSE = 2005; // Close socket initiatively EITP = 2006; // Failed Itp response + + // Errno related to RDMA (may happen at both sides) + ERDMA = 3001; // RDMA verbs error + ERDMAMEM = 3002; // Memory not registered for RDMA } diff --git a/src/brpc/esp_message.h b/src/brpc/esp_message.h index ce271d45cc..279f7b6a08 100644 --- a/src/brpc/esp_message.h +++ b/src/brpc/esp_message.h @@ -58,7 +58,7 @@ class EspMessage : public ::google::protobuf::Message { #if GOOGLE_PROTOBUF_VERSION >= 3006000 EspMessage* New(::google::protobuf::Arena* arena) const override; #endif - void CopyFrom(const ::google::protobuf::Message& from) override; + void CopyFrom(const ::google::protobuf::Message& from) PB_321_OVERRIDE; void MergeFrom(const ::google::protobuf::Message& from) override; void CopyFrom(const EspMessage& from); void MergeFrom(const EspMessage& from); diff --git a/src/brpc/event_dispatcher.h b/src/brpc/event_dispatcher.h index b6cae400ac..1f165cfc70 100644 --- a/src/brpc/event_dispatcher.h +++ b/src/brpc/event_dispatcher.h @@ -30,6 +30,7 @@ namespace brpc { // running in separate bthreads. class EventDispatcher { friend class Socket; +friend class rdma::RdmaEndpoint; public: EventDispatcher(); diff --git a/src/brpc/global.cpp b/src/brpc/global.cpp old mode 100755 new mode 100644 index ced8a11cbc..af8dac5c27 --- a/src/brpc/global.cpp +++ b/src/brpc/global.cpp @@ -38,6 +38,7 @@ #include "brpc/policy/remote_file_naming_service.h" #include "brpc/policy/consul_naming_service.h" #include "brpc/policy/discovery_naming_service.h" +#include "brpc/policy/nacos_naming_service.h" // Load Balancers #include "brpc/policy/round_robin_load_balancer.h" @@ -135,6 +136,7 @@ struct GlobalExtensions { RemoteFileNamingService rfns; ConsulNamingService cns; DiscoveryNamingService dcns; + NacosNamingService nns; RoundRobinLoadBalancer rr_lb; WeightedRoundRobinLoadBalancer wrr_lb; @@ -358,6 +360,7 @@ static void GlobalInitializeOrDieImpl() { NamingServiceExtension()->RegisterOrDie("remotefile", &g_ext->rfns); NamingServiceExtension()->RegisterOrDie("consul", &g_ext->cns); NamingServiceExtension()->RegisterOrDie("discovery", &g_ext->dcns); + NamingServiceExtension()->RegisterOrDie("nacos", &g_ext->nns); // Load Balancers LoadBalancerExtension()->RegisterOrDie("rr", &g_ext->rr_lb); diff --git a/src/brpc/input_messenger.cpp b/src/brpc/input_messenger.cpp index d9b1a3a9ad..f33e626ca4 100644 --- a/src/brpc/input_messenger.cpp +++ b/src/brpc/input_messenger.cpp @@ -27,6 +27,7 @@ #include "brpc/options.pb.h" // ProtocolType #include "brpc/reloadable_flags.h" // BRPC_VALIDATE_GFLAG #include "brpc/protocol.h" // ListProtocols +#include "brpc/rdma/rdma_endpoint.h" #include "brpc/input_messenger.h" @@ -58,6 +59,7 @@ DECLARE_uint64(max_body_size); const size_t MSG_SIZE_WINDOW = 10; // Take last so many message into stat. const size_t MIN_ONCE_READ = 4096; const size_t MAX_ONCE_READ = 524288; +const size_t PROTO_DUMMY_LEN = 4; ParseResult InputMessenger::CutInputMessage( Socket* m, size_t* index, bool read_eof) { @@ -85,6 +87,16 @@ ParseResult InputMessenger::CutInputMessage( << " bytes, the connection will be closed." " Set max_body_size to allow bigger messages"; return result; + } else { + if (m->_read_buf.size() >= 4) { + char data[PROTO_DUMMY_LEN]; + m->_read_buf.copy_to_cstr(data, PROTO_DUMMY_LEN); + if (strncmp(data, "RDMA", PROTO_DUMMY_LEN) == 0 && + m->_rdma_state == Socket::RDMA_OFF) { + // To avoid timeout when client uses RDMA but server uses TCP + return MakeParseError(PARSE_ERROR_TRY_OTHERS); + } + } } if (m->CreatedByConnect()) { @@ -177,6 +189,131 @@ static void QueueMessage(InputMessageBase* to_run_msg, } } +InputMessenger::InputMessageClosure::~InputMessageClosure() { + if (_msg) { + ProcessInputMessage(_msg); + } +} + +void InputMessenger::InputMessageClosure::reset(InputMessageBase* m) { + if (_msg) { + ProcessInputMessage(_msg); + } + _msg = m; +} + +int InputMessenger::ProcessNewMessage( + Socket* m, ssize_t bytes, bool read_eof, + const uint64_t received_us, const uint64_t base_realtime, + InputMessageClosure& last_msg) { + m->AddInputBytes(bytes); + + // Avoid this socket to be closed due to idle_timeout_s + m->_last_readtime_us.store(received_us, butil::memory_order_relaxed); + + size_t last_size = m->_read_buf.length(); + int num_bthread_created = 0; + while (1) { + size_t index = 8888; + ParseResult pr = CutInputMessage(m, &index, read_eof); + if (!pr.is_ok()) { + if (pr.error() == PARSE_ERROR_NOT_ENOUGH_DATA) { + // incomplete message, re-read. + // However, some buffer may have been consumed + // under protocols like HTTP. Record this size + m->_last_msg_size += (last_size - m->_read_buf.length()); + break; + } else if (pr.error() == PARSE_ERROR_TRY_OTHERS) { + LOG(WARNING) + << "Close " << *m << " due to unknown message: " + << butil::ToPrintable(m->_read_buf); + m->SetFailed(EINVAL, "Close %s due to unknown message", + m->description().c_str()); + return -1; + } else { + LOG(WARNING) << "Close " << *m << ": " << pr.error_str(); + m->SetFailed(EINVAL, "Close %s: %s", + m->description().c_str(), pr.error_str()); + return -1; + } + } + + m->AddInputMessages(1); + // Calculate average size of messages + const size_t cur_size = m->_read_buf.length(); + if (cur_size == 0) { + // _read_buf is consumed, it's good timing to return blocks + // cached internally back to TLS, otherwise the memory is not + // reused until next message arrives which is quite uncertain + // in situations that most connections are idle. + m->_read_buf.return_cached_blocks(); + } + m->_last_msg_size += (last_size - cur_size); + last_size = cur_size; + const size_t old_avg = m->_avg_msg_size; + if (old_avg != 0) { + m->_avg_msg_size = (old_avg * (MSG_SIZE_WINDOW - 1) + m->_last_msg_size) + / MSG_SIZE_WINDOW; + } else { + m->_avg_msg_size = m->_last_msg_size; + } + m->_last_msg_size = 0; + + if (pr.message() == NULL) { // the Process() step can be skipped. + continue; + } + pr.message()->_received_us = received_us; + pr.message()->_base_real_us = base_realtime; + + // This unique_ptr prevents msg to be lost before transfering + // ownership to last_msg + DestroyingPtr msg(pr.message()); + QueueMessage(last_msg.release(), &num_bthread_created, + m->_keytable_pool); + if (_handlers[index].process == NULL) { + LOG(ERROR) << "process of index=" << index << " is NULL"; + continue; + } + m->ReAddress(&msg->_socket); + m->PostponeEOF(); + msg->_process = _handlers[index].process; + msg->_arg = _handlers[index].arg; + + if (_handlers[index].verify != NULL) { + int auth_error = 0; + if (0 == m->FightAuthentication(&auth_error)) { + // Get the right to authenticate + if (_handlers[index].verify(msg.get())) { + m->SetAuthentication(0); + } else { + m->SetAuthentication(ERPCAUTH); + LOG(WARNING) << "Fail to authenticate " << *m; + m->SetFailed(ERPCAUTH, "Fail to authenticate %s", + m->description().c_str()); + return -1; + } + } else { + LOG_IF(FATAL, auth_error != 0) << + "Impossible! Socket should have been " + "destroyed when authentication failed"; + } + } + if (!m->is_read_progressive()) { + // Transfer ownership to last_msg + last_msg.reset(msg.release()); + } else { + QueueMessage(msg.release(), &num_bthread_created, + m->_keytable_pool); + bthread_flush(); + num_bthread_created = 0; + } + } + if (num_bthread_created) { + bthread_flush(); + } + return 0; +} + void InputMessenger::OnNewMessages(Socket* m) { // Notes: // - If the socket has only one message, the message will be parsed and @@ -189,13 +326,12 @@ void InputMessenger::OnNewMessages(Socket* m) { // - Verify will always be called in this bthread at most once and before // any process. InputMessenger* messenger = static_cast(m->user()); - const InputMessageHandler* handlers = messenger->_handlers; int progress = Socket::PROGRESS_INIT; // Notice that all *return* no matter successful or not will run last // message, even if the socket is about to be closed. This should be // OK in most cases. - std::unique_ptr last_msg; + InputMessageClosure last_msg; bool read_eof = false; while (!read_eof) { const int64_t received_us = butil::cpuwide_time_us(); @@ -233,112 +369,11 @@ void InputMessenger::OnNewMessages(Socket* m) { continue; } } - - m->AddInputBytes(nr); - // Avoid this socket to be closed due to idle_timeout_s - m->_last_readtime_us.store(received_us, butil::memory_order_relaxed); - - size_t last_size = m->_read_buf.length(); - int num_bthread_created = 0; - while (1) { - size_t index = 8888; - ParseResult pr = messenger->CutInputMessage(m, &index, read_eof); - if (!pr.is_ok()) { - if (pr.error() == PARSE_ERROR_NOT_ENOUGH_DATA) { - // incomplete message, re-read. - // However, some buffer may have been consumed - // under protocols like HTTP. Record this size - m->_last_msg_size += (last_size - m->_read_buf.length()); - break; - } else if (pr.error() == PARSE_ERROR_TRY_OTHERS) { - LOG(WARNING) - << "Close " << *m << " due to unknown message: " - << butil::ToPrintable(m->_read_buf); - m->SetFailed(EINVAL, "Close %s due to unknown message", - m->description().c_str()); - return; - } else { - LOG(WARNING) << "Close " << *m << ": " << pr.error_str(); - m->SetFailed(EINVAL, "Close %s: %s", - m->description().c_str(), pr.error_str()); - return; - } - } - - m->AddInputMessages(1); - // Calculate average size of messages - const size_t cur_size = m->_read_buf.length(); - if (cur_size == 0) { - // _read_buf is consumed, it's good timing to return blocks - // cached internally back to TLS, otherwise the memory is not - // reused until next message arrives which is quite uncertain - // in situations that most connections are idle. - m->_read_buf.return_cached_blocks(); - } - m->_last_msg_size += (last_size - cur_size); - last_size = cur_size; - const size_t old_avg = m->_avg_msg_size; - if (old_avg != 0) { - m->_avg_msg_size = (old_avg * (MSG_SIZE_WINDOW - 1) + m->_last_msg_size) - / MSG_SIZE_WINDOW; - } else { - m->_avg_msg_size = m->_last_msg_size; - } - m->_last_msg_size = 0; - - if (pr.message() == NULL) { // the Process() step can be skipped. - continue; - } - pr.message()->_received_us = received_us; - pr.message()->_base_real_us = base_realtime; - - // This unique_ptr prevents msg to be lost before transfering - // ownership to last_msg - DestroyingPtr msg(pr.message()); - QueueMessage(last_msg.release(), &num_bthread_created, - m->_keytable_pool); - if (handlers[index].process == NULL) { - LOG(ERROR) << "process of index=" << index << " is NULL"; - continue; - } - m->ReAddress(&msg->_socket); - m->PostponeEOF(); - msg->_process = handlers[index].process; - msg->_arg = handlers[index].arg; - - if (handlers[index].verify != NULL) { - int auth_error = 0; - if (0 == m->FightAuthentication(&auth_error)) { - // Get the right to authenticate - if (handlers[index].verify(msg.get())) { - m->SetAuthentication(0); - } else { - m->SetAuthentication(ERPCAUTH); - LOG(WARNING) << "Fail to authenticate " << *m; - m->SetFailed(ERPCAUTH, "Fail to authenticate %s", - m->description().c_str()); - return; - } - } else { - LOG_IF(FATAL, auth_error != 0) << - "Impossible! Socket should have been " - "destroyed when authentication failed"; - } - } - if (!m->is_read_progressive()) { - // Transfer ownership to last_msg - last_msg.reset(msg.release()); - } else { - QueueMessage(msg.release(), &num_bthread_created, - m->_keytable_pool); - bthread_flush(); - num_bthread_created = 0; - } - } - if (num_bthread_created) { - bthread_flush(); - } + if (m->_rdma_state == Socket::RDMA_OFF && messenger->ProcessNewMessage( + m, nr, read_eof, received_us, base_realtime, last_msg) < 0) { + return; + } } if (read_eof) { @@ -444,7 +479,16 @@ int InputMessenger::Create(const butil::EndPoint& remote_side, int InputMessenger::Create(SocketOptions options, SocketId* id) { options.user = this; - options.on_edge_triggered_events = OnNewMessages; +#if BRPC_WITH_RDMA + if (options.use_rdma) { + options.on_edge_triggered_events = rdma::RdmaEndpoint::OnNewDataFromTcp; + options.app_connect = std::make_shared(); + } else { +#else + { +#endif + options.on_edge_triggered_events = OnNewMessages; + } return Socket::Create(options, id); } diff --git a/src/brpc/input_messenger.h b/src/brpc/input_messenger.h index f588b57343..68e7b0c78a 100644 --- a/src/brpc/input_messenger.h +++ b/src/brpc/input_messenger.h @@ -26,6 +26,9 @@ namespace brpc { +namespace rdma { +class RdmaEndpoint; +} struct InputMessageHandler { // The callback to cut a message from `source'. @@ -70,6 +73,7 @@ struct InputMessageHandler { // Process messages from connections. // `Message' corresponds to a client's request or a server's response. class InputMessenger : public SocketUser { +friend class rdma::RdmaEndpoint; public: explicit InputMessenger(size_t capacity = 128); ~InputMessenger(); @@ -107,10 +111,34 @@ class InputMessenger : public SocketUser { static void OnNewMessages(Socket* m); private: + class InputMessageClosure { + public: + InputMessageClosure() : _msg(NULL) { } + ~InputMessageClosure(); + + InputMessageBase* release() { + InputMessageBase* m = _msg; + _msg = NULL; + return m; + } + + void reset(InputMessageBase* m); + + private: + InputMessageBase* _msg; + }; + // Find a valid scissor from `handlers' to cut off `header' and `payload' // from m->read_buf, save index of the scissor into `index'. ParseResult CutInputMessage(Socket* m, size_t* index, bool read_eof); + // Process a new message just received in OnNewMessages + // Return value >= 0 means success + int ProcessNewMessage( + Socket* m, ssize_t bytes, bool read_eof, + const uint64_t received_us, const uint64_t base_realtime, + InputMessageClosure& last_msg); + // User-supplied scissors and handlers. // the index of handler is exactly the same as the protocol InputMessageHandler* _handlers; diff --git a/src/brpc/memcache.h b/src/brpc/memcache.h index a34ccd6021..c6fd2b95f4 100644 --- a/src/brpc/memcache.h +++ b/src/brpc/memcache.h @@ -94,7 +94,7 @@ class MemcacheRequest : public ::google::protobuf::Message { #if GOOGLE_PROTOBUF_VERSION >= 3006000 MemcacheRequest* New(::google::protobuf::Arena* arena) const override; #endif - void CopyFrom(const ::google::protobuf::Message& from) override; + void CopyFrom(const ::google::protobuf::Message& from) PB_321_OVERRIDE; void MergeFrom(const ::google::protobuf::Message& from) override; void CopyFrom(const MemcacheRequest& from); void MergeFrom(const MemcacheRequest& from); @@ -207,7 +207,7 @@ class MemcacheResponse : public ::google::protobuf::Message { #if GOOGLE_PROTOBUF_VERSION >= 3006000 MemcacheResponse* New(::google::protobuf::Arena* arena) const override; #endif - void CopyFrom(const ::google::protobuf::Message& from) override; + void CopyFrom(const ::google::protobuf::Message& from) PB_321_OVERRIDE; void MergeFrom(const ::google::protobuf::Message& from) override; void CopyFrom(const MemcacheResponse& from); void MergeFrom(const MemcacheResponse& from); diff --git a/src/brpc/nshead_message.h b/src/brpc/nshead_message.h index 475d8ce8d6..11cc1c60da 100644 --- a/src/brpc/nshead_message.h +++ b/src/brpc/nshead_message.h @@ -54,7 +54,7 @@ class NsheadMessage : public ::google::protobuf::Message { #if GOOGLE_PROTOBUF_VERSION >= 3006000 NsheadMessage* New(::google::protobuf::Arena* arena) const override; #endif - void CopyFrom(const ::google::protobuf::Message& from) override; + void CopyFrom(const ::google::protobuf::Message& from) PB_321_OVERRIDE; void MergeFrom(const ::google::protobuf::Message& from) override; void CopyFrom(const NsheadMessage& from); void MergeFrom(const NsheadMessage& from); diff --git a/src/brpc/pb_compat.h b/src/brpc/pb_compat.h index c9654b7517..70faeb5424 100644 --- a/src/brpc/pb_compat.h +++ b/src/brpc/pb_compat.h @@ -19,6 +19,12 @@ #ifndef BRPC_PB_COMPAT_H #define BRPC_PB_COMPAT_H +#if GOOGLE_PROTOBUF_VERSION < 3021000 +# define PB_321_OVERRIDE override +#else +# define PB_321_OVERRIDE +#endif + #if GOOGLE_PROTOBUF_VERSION < 3019000 # define PB_319_OVERRIDE override #else diff --git a/src/brpc/periodic_naming_service.cpp b/src/brpc/periodic_naming_service.cpp index e113624199..5e10977997 100644 --- a/src/brpc/periodic_naming_service.cpp +++ b/src/brpc/periodic_naming_service.cpp @@ -29,6 +29,10 @@ DEFINE_int32(ns_access_interval, 5, "Wait so many seconds before next access to naming service"); BRPC_VALIDATE_GFLAG(ns_access_interval, PositiveInteger); +int PeriodicNamingService::GetNamingServiceAccessIntervalMs() const { + return std::max(FLAGS_ns_access_interval, 1) * 1000; +} + int PeriodicNamingService::RunNamingService( const char* service_name, NamingServiceActions* actions) { std::vector servers; @@ -47,7 +51,7 @@ int PeriodicNamingService::RunNamingService( actions->ResetServers(servers); } - if (bthread_usleep(std::max(FLAGS_ns_access_interval, 1) * 1000000L) < 0) { + if (bthread_usleep(GetNamingServiceAccessIntervalMs() * 1000UL) < 0) { if (errno == ESTOP) { RPC_VLOG << "Quit NamingServiceThread=" << bthread_self(); return 0; diff --git a/src/brpc/periodic_naming_service.h b/src/brpc/periodic_naming_service.h index b27033f720..8216ddfdbc 100644 --- a/src/brpc/periodic_naming_service.h +++ b/src/brpc/periodic_naming_service.h @@ -29,6 +29,8 @@ class PeriodicNamingService : public NamingService { virtual int GetServers(const char *service_name, std::vector* servers) = 0; + virtual int GetNamingServiceAccessIntervalMs() const; + int RunNamingService(const char* service_name, NamingServiceActions* actions); }; diff --git a/src/brpc/policy/http_rpc_protocol.cpp b/src/brpc/policy/http_rpc_protocol.cpp index 22b79ca929..6e32598f3a 100644 --- a/src/brpc/policy/http_rpc_protocol.cpp +++ b/src/brpc/policy/http_rpc_protocol.cpp @@ -77,6 +77,9 @@ DEFINE_bool(pb_enum_as_number, false, DEFINE_string(request_id_header, "x-request-id", "The http header to mark a session"); +DEFINE_bool(use_http_error_code, false, "Whether set the x-bd-error-code header " + "of http response to brpc error code"); + // Read user address from the header specified by -http_header_of_user_ip static bool GetUserAddressFromHeaderImpl(const HttpHeader& headers, butil::EndPoint* user_addr) { @@ -395,7 +398,16 @@ void ProcessHttpResponse(InputMessageBase* msg) { &err, std::min((int)res_body.size(), FLAGS_http_max_error_length)); } - cntl->SetFailed(EHTTP, "%s", err.c_str()); + // If server return brpc error code by x-bd-error-code, + // set the returned error code to controller. Otherwise, + // set EHTTP to controller uniformly. + const std::string* error_code_ptr = res_header->GetHeader(common->ERROR_CODE); + int error_code = error_code_ptr ? strtol(error_code_ptr->data(), NULL, 10) : 0; + if (FLAGS_use_http_error_code && error_code != 0) { + cntl->SetFailed(error_code, "%s", err.c_str()); + } else { + cntl->SetFailed(EHTTP, "%s", err.c_str()); + } if (cntl->response() == NULL || cntl->response()->GetDescriptor()->field_count() == 0) { // A http call. Http users may need the body(containing a html, @@ -1457,7 +1469,7 @@ void ProcessHttpRequest(InputMessageBase *msg) { if (is_grpc_ct) { bool grpc_compressed = false; if (!RemoveGrpcPrefix(&req_body, &grpc_compressed)) { - cntl->SetFailed(ERESPONSE, "Invalid gRPC response"); + cntl->SetFailed(EREQUEST, "Invalid gRPC request"); return; } if (grpc_compressed) { diff --git a/src/brpc/policy/nacos_naming_service.cpp b/src/brpc/policy/nacos_naming_service.cpp new file mode 100644 index 0000000000..0431626ea5 --- /dev/null +++ b/src/brpc/policy/nacos_naming_service.cpp @@ -0,0 +1,289 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "nacos_naming_service.h" + +#include + +#include + +#include "brpc/http_status_code.h" +#include "brpc/log.h" +#include "butil/iobuf.h" +#include "butil/logging.h" +#include "butil/third_party/rapidjson/document.h" + +namespace brpc { +namespace policy { + +DEFINE_string(nacos_address, "", + "The query string of request nacos for discovering service."); +DEFINE_string(nacos_service_discovery_path, "/nacos/v1/ns/instance/list", + "The url path for discovering service."); +DEFINE_string(nacos_service_auth_path, "/nacos/v1/auth/login", + "The url path for authentiction."); +DEFINE_int32(nacos_connect_timeout_ms, 200, + "Timeout for creating connections to nacos in milliseconds"); +DEFINE_string(nacos_username, "", "nacos username"); +DEFINE_string(nacos_password, "", "nacos password"); +DEFINE_string(nacos_load_balancer, "rr", "nacos load balancer name"); + +int NacosNamingService::Connect() { + ChannelOptions opt; + opt.protocol = PROTOCOL_HTTP; + opt.connect_timeout_ms = FLAGS_nacos_connect_timeout_ms; + const int ret = _channel.Init(FLAGS_nacos_address.c_str(), + FLAGS_nacos_load_balancer.c_str(), &opt); + if (ret != 0) { + LOG(ERROR) << "Fail to init channel to nacos at " + << FLAGS_nacos_address; + } + return ret; +} + +int NacosNamingService::RefreshAccessToken(const char *service_name) { + Controller cntl; + cntl.http_request().uri() = FLAGS_nacos_service_auth_path; + cntl.http_request().set_method(brpc::HttpMethod::HTTP_METHOD_POST); + cntl.http_request().set_content_type("application/x-www-form-urlencoded"); + + auto &buf = cntl.request_attachment(); + buf.append("username="); + buf.append(FLAGS_nacos_username); + buf.append("&password="); + buf.append(FLAGS_nacos_password); + + _channel.CallMethod(nullptr, &cntl, nullptr, nullptr, nullptr); + if (cntl.Failed()) { + LOG(ERROR) << "Fail to access " << FLAGS_nacos_service_auth_path << ": " + << cntl.ErrorText(); + return -1; + } + + BUTIL_RAPIDJSON_NAMESPACE::Document doc; + if (doc.Parse(cntl.response_attachment().to_string().c_str()) + .HasParseError()) { + LOG(ERROR) << "Failed to parse nacos auth response"; + return -1; + } + if (!doc.IsObject()) { + LOG(ERROR) << "The nacos's auth response for " << service_name + << " is not a json object"; + return -1; + } + + auto iter = doc.FindMember("accessToken"); + if (iter != doc.MemberEnd() && iter->value.IsString()) { + _access_token = iter->value.GetString(); + } else { + LOG(ERROR) << "The nacos's auth response for " << service_name + << " has no accessToken field"; + return -1; + } + + auto iter_ttl = doc.FindMember("tokenTtl"); + if (iter_ttl != doc.MemberEnd() && iter_ttl->value.IsInt()) { + _token_expire_time = time(NULL) + iter_ttl->value.GetInt() - 10; + } else { + _token_expire_time = 0; + } + + return 0; +} + +int NacosNamingService::GetServerNodes(const char *service_name, + bool token_changed, + std::vector *nodes) { + if (_nacos_url.empty() || token_changed) { + _nacos_url = FLAGS_nacos_service_discovery_path; + _nacos_url += "?"; + if (!_access_token.empty()) { + _nacos_url += "accessToken=" + _access_token; + _nacos_url += "&"; + } + _nacos_url += service_name; + } + + Controller cntl; + cntl.http_request().uri() = _nacos_url; + _channel.CallMethod(nullptr, &cntl, nullptr, nullptr, nullptr); + if (cntl.Failed()) { + LOG(ERROR) << "Fail to access " << _nacos_url << ": " + << cntl.ErrorText(); + return -1; + } + if (cntl.http_response().status_code() != HTTP_STATUS_OK) { + LOG(ERROR) << "Failed to request nacos, http status code: " + << cntl.http_response().status_code(); + return -1; + } + + BUTIL_RAPIDJSON_NAMESPACE::Document doc; + if (doc.Parse(cntl.response_attachment().to_string().c_str()) + .HasParseError()) { + LOG(ERROR) << "Failed to parse nacos response"; + return -1; + } + if (!doc.IsObject()) { + LOG(ERROR) << "The nacos's response for " << service_name + << " is not a json object"; + return -1; + } + + auto it_hosts = doc.FindMember("hosts"); + if (it_hosts == doc.MemberEnd()) { + LOG(ERROR) << "The nacos's response for " << service_name + << " has no hosts member"; + return -1; + } + auto &hosts = it_hosts->value; + if (!hosts.IsArray()) { + LOG(ERROR) << "hosts member in nacos response is not an array"; + return -1; + } + + std::set presence; + for (auto it = hosts.Begin(); it != hosts.End(); ++it) { + auto &host = *it; + if (!host.IsObject()) { + LOG(ERROR) << "host member in nacos response is not an object"; + continue; + } + + auto it_ip = host.FindMember("ip"); + if (it_ip == host.MemberEnd() || !it_ip->value.IsString()) { + LOG(ERROR) << "host in nacos response has not ip"; + continue; + } + auto &ip = it_ip->value; + + auto it_port = host.FindMember("port"); + if (it_port == host.MemberEnd() || !it_port->value.IsInt()) { + LOG(ERROR) << "host in nacos response has not port"; + continue; + } + auto &port = it_port->value; + + auto it_enabled = host.FindMember("enabled"); + if (it_enabled == host.MemberEnd() || !(it_enabled->value.IsBool()) || + !(it_enabled->value.GetBool())) { + LOG(INFO) << "nacos " << ip.GetString() << ":" << port.GetInt() + << " is not enable"; + continue; + } + + auto it_healthy = host.FindMember("healthy"); + if (it_healthy == host.MemberEnd() || !(it_healthy->value.IsBool()) || + !(it_healthy->value.GetBool())) { + LOG(INFO) << "nacos " << ip.GetString() << ":" << port.GetInt() + << " is not healthy"; + continue; + } + + butil::EndPoint end_point; + if (str2endpoint(ip.GetString(), port.GetUint(), &end_point) != 0) { + LOG(ERROR) << "ncos service with illegal address or port: " + << ip.GetString() << ":" << port.GetUint(); + continue; + } + + ServerNode node(end_point); + auto it_weight = host.FindMember("weight"); + if (it_weight != host.MemberEnd() && it_weight->value.IsNumber()) { + node.tag = + std::to_string(static_cast(it_weight->value.GetDouble())); + } + + presence.insert(node); + } + + nodes->reserve(presence.size()); + nodes->assign(presence.begin(), presence.end()); + + if (nodes->empty() && hosts.Size() != 0) { + LOG(ERROR) << "All service about " << service_name + << " from nacos is invalid, refuse to update servers"; + return -1; + } + + RPC_VLOG << "Got " << nodes->size() + << (nodes->size() > 1 ? " servers" : " server") << " from " + << service_name; + + auto it_cache = doc.FindMember("cacheMillis"); + if (it_cache != doc.MemberEnd() && it_cache->value.IsInt64()) { + _cache_ms = it_cache->value.GetInt64(); + } + + return 0; +} + +NacosNamingService::NacosNamingService() + : _nacos_connected(false), _cache_ms(-1), _token_expire_time(0) {} + +int NacosNamingService::GetNamingServiceAccessIntervalMs() const { + if (0 < _cache_ms) { + return _cache_ms; + } + return PeriodicNamingService::GetNamingServiceAccessIntervalMs(); +} + +int NacosNamingService::GetServers(const char *service_name, + std::vector *servers) { + if (!_nacos_connected) { + const int ret = Connect(); + if (0 == ret) { + _nacos_connected = true; + } else { + return ret; + } + } + + const bool authentiction_enabled = + !FLAGS_nacos_username.empty() && !FLAGS_nacos_password.empty(); + const bool has_invalid_access_token = + _access_token.empty() || + (0 < _token_expire_time && _token_expire_time <= time(NULL)); + bool token_changed = false; + + if (authentiction_enabled && has_invalid_access_token) { + const int ret = RefreshAccessToken(service_name); + if (ret == 0) { + token_changed = true; + } else { + return ret; + } + } + + servers->clear(); + return GetServerNodes(service_name, token_changed, servers); +} + +void NacosNamingService::Describe(std::ostream &os, + const DescribeOptions &) const { + os << "nacos"; + return; +} + +NamingService *NacosNamingService::New() const { + return new NacosNamingService; +} + +void NacosNamingService::Destroy() { delete this; } + +} // namespace policy +} // namespace brpc diff --git a/src/brpc/policy/nacos_naming_service.h b/src/brpc/policy/nacos_naming_service.h new file mode 100644 index 0000000000..dcd7713617 --- /dev/null +++ b/src/brpc/policy/nacos_naming_service.h @@ -0,0 +1,67 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef BRPC_POLICY_NACOS_NAMING_SERVICE_H +#define BRPC_POLICY_NACOS_NAMING_SERVICE_H + +#include + +#include +#include + +#include "brpc/channel.h" +#include "brpc/periodic_naming_service.h" +#include "brpc/server_node.h" + +namespace brpc { +namespace policy { + +// Acquire server list from nacos +class NacosNamingService : public PeriodicNamingService { +public: + NacosNamingService(); + + int GetServers(const char* service_name, + std::vector* servers) override; + + int GetNamingServiceAccessIntervalMs() const override; + + void Describe(std::ostream& os, const DescribeOptions&) const override; + + NamingService* New() const override; + + void Destroy() override; + +private: + int Connect(); + int RefreshAccessToken(const char* service_name); + int GetServerNodes(const char* service_name, bool token_changed, + std::vector* nodes); + +private: + brpc::Channel _channel; + std::string _nacos_url; + std::string _access_token; + bool _nacos_connected; + long _cache_ms; + time_t _token_expire_time; +}; + +} // namespace policy +} // namespace brpc + +#endif // BRPC_POLICY_NACOS_NAMING_SERVICE_H diff --git a/src/brpc/policy/redis_protocol.cpp b/src/brpc/policy/redis_protocol.cpp index 67e5213307..94524e8b75 100644 --- a/src/brpc/policy/redis_protocol.cpp +++ b/src/brpc/policy/redis_protocol.cpp @@ -190,7 +190,9 @@ ParseResult ParseRedisMessage(butil::IOBuf* source, Socket* socket, wopt.ignore_eovercrowded = true; LOG_IF(WARNING, socket->Write(&sendbuf, &wopt) != 0) << "Fail to send redis reply"; - ctx->arena.clear(); + if(ctx->parser.ParsedArgsSize() == 0) { + ctx->arena.clear(); + } return MakeParseError(err); } else { // NOTE(gejun): PopPipelinedInfo() is actually more contended than what diff --git a/src/brpc/protocol.h b/src/brpc/protocol.h old mode 100755 new mode 100644 diff --git a/src/brpc/rdma/block_pool.cpp b/src/brpc/rdma/block_pool.cpp new file mode 100644 index 0000000000..54e47da457 --- /dev/null +++ b/src/brpc/rdma/block_pool.cpp @@ -0,0 +1,561 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#if BRPC_WITH_RDMA + +#include +#include +#include +#include +#include "butil/fast_rand.h" +#include "butil/iobuf.h" +#include "butil/object_pool.h" +#include "butil/thread_local.h" +#include "bthread/bthread.h" +#include "brpc/rdma/block_pool.h" + + +namespace brpc { +namespace rdma { + +DEFINE_int32(rdma_memory_pool_initial_size_mb, 1024, + "Initial size of memory pool for RDMA (MB)"); +DEFINE_int32(rdma_memory_pool_increase_size_mb, 1024, + "Increased size of memory pool for RDMA (MB)"); +DEFINE_int32(rdma_memory_pool_max_regions, 4, "Max number of regions"); +DEFINE_int32(rdma_memory_pool_buckets, 4, "Number of buckets to reduce race"); +DEFINE_int32(rdma_memory_pool_tls_cache_num, 128, "Number of cached block in tls"); + +static RegisterCallback g_cb = NULL; + +// Number of bytes in 1MB +static const size_t BYTES_IN_MB = 1048576; + +static const int BLOCK_DEFAULT = 0; // 8KB +static const int BLOCK_LARGE = 1; // 64KB +static const int BLOCK_HUGE = 2; // 2MB +static const int BLOCK_SIZE_COUNT = 3; +static size_t g_block_size[BLOCK_SIZE_COUNT] = { 8192, 65536, 2 * BYTES_IN_MB }; + +struct IdleNode { + void* start; + size_t len; + IdleNode* next; +}; + +struct Region { + Region() { start = 0; } + uintptr_t start; + size_t size; + uint32_t block_type; + uint32_t id; // lkey +}; + +static const int32_t RDMA_MEMORY_POOL_MIN_REGIONS = 1; +static const int32_t RDMA_MEMORY_POOL_MAX_REGIONS = 16; +static Region g_regions[RDMA_MEMORY_POOL_MAX_REGIONS]; +static int g_region_num = 0; + +static const int32_t RDMA_MEMORY_POOL_MIN_SIZE = 32; // 16MB +static const int32_t RDMA_MEMORY_POOL_MAX_SIZE = 1048576; // 1TB + +static const int32_t RDMA_MEMORY_POOL_MIN_BUCKETS = 1; +static const int32_t RDMA_MEMORY_POOL_MAX_BUCKETS = 16; +static size_t g_buckets = 1; + +static bool g_dump_enable = false; +static butil::Mutex* g_dump_mutex = NULL; + +// Only for default block size +static __thread IdleNode* tls_idle_list = NULL; +static __thread size_t tls_idle_num = 0; +static __thread bool tls_inited = false; +static butil::Mutex* g_tls_info_mutex = NULL; +static size_t g_tls_info_cnt = 0; +static size_t* g_tls_info[1024]; + +// For each block size, there are some buckets of idle list to reduce race. +struct GlobalInfo { + std::vector idle_list[BLOCK_SIZE_COUNT]; + std::vector lock[BLOCK_SIZE_COUNT]; + std::vector idle_size[BLOCK_SIZE_COUNT]; + butil::Mutex extend_lock; +}; +static GlobalInfo* g_info = NULL; + +static inline Region* GetRegion(const void* buf) { + if (!buf) { + errno = EINVAL; + return NULL; + } + Region* r = NULL; + uintptr_t addr = (uintptr_t)buf; + for (int i = 0; i < FLAGS_rdma_memory_pool_max_regions; ++i) { + if (g_regions[i].start == 0) { + break; + } + if (addr >= g_regions[i].start && + addr < g_regions[i].start + g_regions[i].size) { + r = &g_regions[i]; + break; + } + } + return r; +} + +uint32_t GetRegionId(const void* buf) { + Region* r = GetRegion(buf); + if (!r) { + return 0; + } + return r->id; +} + +// Extend the block pool with a new region (with different region ID) +static void* ExtendBlockPool(size_t region_size, int block_type) { + if (region_size < 1) { + errno = EINVAL; + return NULL; + } + + if (g_region_num == FLAGS_rdma_memory_pool_max_regions) { + LOG(INFO) << "Memory pool reaches max regions"; + errno = ENOMEM; + return NULL; + } + + // Regularize region size + region_size = region_size * BYTES_IN_MB / g_block_size[block_type] / g_buckets; + region_size *= g_block_size[block_type] * g_buckets; + + LOG(INFO) << "Start extend rdma memory " << region_size / BYTES_IN_MB << "MB"; + + void* region_base = NULL; + if (posix_memalign(®ion_base, 4096, region_size) != 0) { + PLOG_EVERY_SECOND(ERROR) << "Memory not enough"; + return NULL; + } + + uint32_t id = g_cb(region_base, region_size); + if (id == 0) { + free(region_base); + return NULL; + } + + IdleNode* node[g_buckets]; + for (size_t i = 0; i < g_buckets; ++i) { + node[i] = butil::get_object(); + if (!node[i]) { + PLOG_EVERY_SECOND(ERROR) << "Memory not enough"; + for (size_t j = 0; j < i; ++j) { + butil::return_object(node[j]); + } + free(region_base); + return NULL; + } + } + + Region* region = &g_regions[g_region_num++]; + region->start = (uintptr_t)region_base; + region->size = region_size; + region->id = id; + region->block_type = block_type; + + for (size_t i = 0; i < g_buckets; ++i) { + node[i]->start = (void*)(region->start + i * (region_size / g_buckets)); + node[i]->len = region_size / g_buckets; + node[i]->next = NULL; + g_info->idle_list[block_type][i] = node[i]; + g_info->idle_size[block_type][i] += node[i]->len; + } + + return region_base; +} + +void* InitBlockPool(RegisterCallback cb) { + if (!cb) { + errno = EINVAL; + return NULL; + } + if (g_cb) { + LOG(WARNING) << "Do not initialize block pool repeatedly"; + errno = EINVAL; + return NULL; + } + g_cb = cb; + if (FLAGS_rdma_memory_pool_max_regions < RDMA_MEMORY_POOL_MIN_REGIONS || + FLAGS_rdma_memory_pool_max_regions > RDMA_MEMORY_POOL_MAX_REGIONS) { + LOG(WARNING) << "rdma_memory_pool_max_regions(" + << FLAGS_rdma_memory_pool_max_regions << ") not in [" + << RDMA_MEMORY_POOL_MIN_REGIONS << "," + << RDMA_MEMORY_POOL_MAX_REGIONS << "]!"; + errno = EINVAL; + return NULL; + } + if (FLAGS_rdma_memory_pool_initial_size_mb < RDMA_MEMORY_POOL_MIN_SIZE || + FLAGS_rdma_memory_pool_initial_size_mb > RDMA_MEMORY_POOL_MAX_SIZE) { + LOG(WARNING) << "rdma_memory_pool_initial_size_mb(" + << FLAGS_rdma_memory_pool_initial_size_mb << ") not in [" + << RDMA_MEMORY_POOL_MIN_SIZE << "," + << RDMA_MEMORY_POOL_MAX_SIZE << "]!"; + errno = EINVAL; + return NULL; + } + if (FLAGS_rdma_memory_pool_increase_size_mb < RDMA_MEMORY_POOL_MIN_SIZE || + FLAGS_rdma_memory_pool_increase_size_mb > RDMA_MEMORY_POOL_MAX_SIZE) { + LOG(WARNING) << "rdma_memory_pool_increase_size_mb(" + << FLAGS_rdma_memory_pool_increase_size_mb << ") not in [" + << RDMA_MEMORY_POOL_MIN_SIZE << "," + << RDMA_MEMORY_POOL_MAX_SIZE << "]!"; + errno = EINVAL; + return NULL; + } + if (FLAGS_rdma_memory_pool_buckets < RDMA_MEMORY_POOL_MIN_BUCKETS || + FLAGS_rdma_memory_pool_buckets > RDMA_MEMORY_POOL_MAX_BUCKETS) { + LOG(WARNING) << "rdma_memory_pool_buckets(" + << FLAGS_rdma_memory_pool_buckets << ") not in [" + << RDMA_MEMORY_POOL_MIN_BUCKETS << "," + << RDMA_MEMORY_POOL_MAX_BUCKETS << "]!"; + errno = EINVAL; + return NULL; + } + g_buckets = FLAGS_rdma_memory_pool_buckets; + + g_info = new (std::nothrow) GlobalInfo; + if (!g_info) { + return NULL; + } + + for (int i = 0; i < BLOCK_SIZE_COUNT; ++i) { + g_info->idle_list[i].resize(g_buckets, NULL); + if (g_info->idle_list[i].size() != g_buckets) { + return NULL; + } + g_info->lock[i].resize(g_buckets, NULL); + if (g_info->lock[i].size() != g_buckets) { + return NULL; + } + g_info->idle_size[i].resize(g_buckets, 0); + if (g_info->idle_size[i].size() != g_buckets) { + return NULL; + } + for (size_t j = 0; j < g_buckets; ++j) { + g_info->lock[i][j] = new (std::nothrow) butil::Mutex; + if (!g_info->lock[i][j]) { + return NULL; + } + } + } + + g_dump_mutex = new butil::Mutex; + g_tls_info_mutex = new butil::Mutex; + + return ExtendBlockPool(FLAGS_rdma_memory_pool_initial_size_mb, + BLOCK_DEFAULT); +} + +static void* AllocBlockFrom(int block_type) { + bool locked = false; + if (BAIDU_UNLIKELY(g_dump_enable)) { + g_dump_mutex->lock(); + locked = true; + } + void* ptr = NULL; + if (block_type == 0 && tls_idle_list != NULL){ + CHECK(tls_idle_num > 0); + IdleNode* n = tls_idle_list; + tls_idle_list = n->next; + ptr = n->start; + butil::return_object(n); + tls_idle_num--; + if (locked) { + g_dump_mutex->unlock(); + } + return ptr; + } + + uint64_t index = butil::fast_rand() % g_buckets; + BAIDU_SCOPED_LOCK(*g_info->lock[block_type][index]); + IdleNode* node = g_info->idle_list[block_type][index]; + if (!node) { + BAIDU_SCOPED_LOCK(g_info->extend_lock); + node = g_info->idle_list[block_type][index]; + if (!node) { + // There is no block left, extend a new region + if (!ExtendBlockPool(FLAGS_rdma_memory_pool_increase_size_mb, + block_type)) { + LOG_EVERY_SECOND(ERROR) << "Fail to extend new region. " + << "You can set the size of memory pool larger. " + << "Refer to the help message of these flags: " + << "rdma_memory_pool_initial_size_mb, " + << "rdma_memory_pool_increase_size_mb, " + << "rdma_memory_pool_max_regions."; + if (locked) { + g_dump_mutex->unlock(); + } + return NULL; + } + node = g_info->idle_list[block_type][index]; + } + } + if (node) { + ptr = node->start; + if (node->len > g_block_size[block_type]) { + node->start = (char*)node->start + g_block_size[block_type]; + node->len -= g_block_size[block_type]; + } else { + g_info->idle_list[block_type][index] = node->next; + butil::return_object(node); + } + g_info->idle_size[block_type][index] -= g_block_size[block_type]; + } else { + if (locked) { + g_dump_mutex->unlock(); + } + return NULL; + } + + // Move more blocks from global list to tls list + if (block_type == 0) { + node = g_info->idle_list[0][index]; + tls_idle_list = node; + IdleNode* last_node = NULL; + while (node) { + if (tls_idle_num > (uint32_t)FLAGS_rdma_memory_pool_tls_cache_num / 2 + || node->len > g_block_size[0]) { + break; + } + tls_idle_num++; + last_node = node; + node = node->next; + } + if (tls_idle_num == 0) { + tls_idle_list = NULL; + } else { + g_info->idle_list[0][index] = node; + } + if (last_node) { + last_node->next = NULL; + } + } + + if (locked) { + g_dump_mutex->unlock(); + } + return ptr; +} + +void* AllocBlock(size_t size) { + if (size == 0 || size > g_block_size[BLOCK_SIZE_COUNT - 1]) { + errno = EINVAL; + return NULL; + } + for (int i = 0; i < BLOCK_SIZE_COUNT; ++i) { + if (size <= g_block_size[i]) { + return AllocBlockFrom(i);; + } + } + return NULL; +} + +void RecycleAll() { + // Only block_type == 0 needs recycle + while (tls_idle_list) { + IdleNode* node = tls_idle_list; + tls_idle_list = node->next; + Region* r = GetRegion(node->start); + uint64_t index = ((uintptr_t)node->start - r->start) * g_buckets / r->size; + BAIDU_SCOPED_LOCK(*g_info->lock[0][index]); + node->next = g_info->idle_list[0][index]; + g_info->idle_list[0][index] = node; + } + tls_idle_num = 0; +} + +int DeallocBlock(void* buf) { + if (!buf) { + errno = EINVAL; + return -1; + } + + Region* r = GetRegion(buf); + if (!r) { + errno = ERANGE; + return -1; + } + + IdleNode* node = butil::get_object(); + if (!node) { + PLOG_EVERY_SECOND(ERROR) << "Memory not enough"; + // May lead to block leak, but do not return -1 + return 0; + } + + uint32_t block_type = r->block_type; + size_t block_size = g_block_size[block_type]; + node->start = buf; + node->len = block_size; + + bool locked = false; + if (BAIDU_UNLIKELY(g_dump_enable)) { + g_dump_mutex->lock(); + locked = true; + } + if (block_type == 0 && tls_idle_num < (uint32_t)FLAGS_rdma_memory_pool_tls_cache_num) { + if (!tls_inited) { + tls_inited = true; + butil::thread_atexit(RecycleAll); + BAIDU_SCOPED_LOCK(*g_tls_info_mutex); + if (g_tls_info_cnt < 1024) { + g_tls_info[g_tls_info_cnt++] = &tls_idle_num; + } + } + tls_idle_num++; + node->next = tls_idle_list; + tls_idle_list = node; + if (locked) { + g_dump_mutex->unlock(); + } + return 0; + } + + uint64_t index = ((uintptr_t)buf - r->start) * g_buckets / r->size; + if (block_type == 0) { + size_t len = 0; + // Recycle half the cached blocks in tls for default block size + int num = FLAGS_rdma_memory_pool_tls_cache_num / 2; + IdleNode* new_head = tls_idle_list; + IdleNode* recycle_tail = NULL; + for (int i = 0; i < num; ++i) { + recycle_tail = new_head; + len += recycle_tail->len; + new_head = new_head->next; + } + if (recycle_tail) { + BAIDU_SCOPED_LOCK(*g_info->lock[0][index]); + recycle_tail->next = node; + node->next = g_info->idle_list[0][index]; + g_info->idle_list[0][index] = tls_idle_list; + g_info->idle_size[0][index] += len; + } + tls_idle_list = new_head; + tls_idle_num -= num; + } else { + BAIDU_SCOPED_LOCK(*g_info->lock[block_type][index]); + node->next = g_info->idle_list[block_type][index]; + g_info->idle_list[block_type][index] = node; + g_info->idle_size[block_type][index] += node->len; + } + if (locked) { + g_dump_mutex->unlock(); + } + return 0; +} + +size_t GetBlockSize(int type) { + return g_block_size[type]; +} + +void DumpMemoryPoolInfo(std::ostream& os) { + if (!g_dump_mutex) { + return; + } + g_dump_enable = true; + usleep(1000); // wait until all the threads read new g_dump_enable + BAIDU_SCOPED_LOCK(*g_dump_mutex); + os << "********************* Memory Pool Info Dump **********************\n"; + os << "Region Info:\n"; + for (int i = 0; i < g_region_num; ++i) { + os << "\tRegion " << i << ":\n" + << "\t\tBase Addr: " << g_regions[i].start << "\n" + << "\t\tSize: " << g_regions[i].size << "\n" + << "\t\tBlock Type: " << g_regions[i].block_type << "\n" + << "\t\tId: " << g_regions[i].id << "\n"; + } + os << "Idle List Info:\n"; + for (int i = 0; i < BLOCK_SIZE_COUNT; ++i) { + os << "\tFor block size " << GetBlockSize(i) << ":\n"; + for (size_t j = 0; j < g_buckets; ++j) { + os << "\t\tBucket " << j << ": " << g_info->idle_size[i][j] << "\n"; + } + } + os << "Thread Local Cache Info:\n"; + for (size_t i = 0; i < g_tls_info_cnt; ++i) { + os << "\tThread " << i << ": " << *g_tls_info[i] * 8192 << "\n"; + } + os << "******************************************************************\n"; + g_dump_enable = false; +} + +// Just for UT +void DestroyBlockPool() { + RecycleAll(); + for (int i = 0; i < BLOCK_SIZE_COUNT; ++i) { + for (size_t j = 0; j < g_buckets; ++j) { + IdleNode* node = g_info->idle_list[i][j]; + while (node) { + IdleNode* tmp = node->next; + butil::return_object(node); + node = tmp; + } + g_info->idle_list[i][j] = NULL; + } + } + delete g_info; + g_info = NULL; + for (int i = 0; i < g_region_num; ++i) { + if (g_regions[i].start == 0) { + break; + } + free((void*)g_regions[i].start); + g_regions[i].start = 0; + } + g_region_num = 0; + g_cb = NULL; +} + +// Just for UT +int GetBlockType(void* buf) { + Region* r = GetRegion(buf); + if (!r) { + return -1; + } + return r->block_type; +} + +// Just for UT +size_t GetGlobalLen(int block_type) { + size_t len = 0; + for (size_t i = 0; i < g_buckets; ++i) { + IdleNode* node = g_info->idle_list[block_type][i]; + while (node) { + len += node->len; + node = node->next; + } + } + return len; +} + +// Just for UT +size_t GetRegionNum() { + return g_region_num; +} + +} // namespace rdma +} // namespace brpc + +#endif // if BRPC_WITH_RDMA diff --git a/src/brpc/rdma/block_pool.h b/src/brpc/rdma/block_pool.h new file mode 100644 index 0000000000..9edfb837df --- /dev/null +++ b/src/brpc/rdma/block_pool.h @@ -0,0 +1,104 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef BRPC_RDMA_BLOCK_POOL_H +#define BRPC_RDMA_BLOCK_POOL_H + +#if BRPC_WITH_RDMA + +namespace brpc { +namespace rdma { + +// This is used as a memory pool for RDMA. The reason why we use memory +// pool is that RDMA transmission requires data in a "registered" space. +// We first get a large bulk of memory by tcmalloc (or other memory +// allocator). Then we call ibv_reg_mr to register this bulk. Every time we +// want to use a piece of memory to send/recv with RDMA, we allocate it from +// memory pool instead of tcmalloc. +// +// It is called block_pool due to the unit of memory allocation is block, +// not byte. That means that when the caller wants to get a piece of +// memory smaller than the block size, the pool will still return a whole +// block for it. Apparently, this mechanism may introduce waste of memory. +// However, since in brpc the memory pool is only used by IOBuf, which +// always requires block allocation, we believe that block_pool is a better +// design than the byte-based memory pool. +// +// Because the initial size of block_pool (default: 1GB) may not enough, we +// hope that the pool is scalable, which means that it can be enlarged when +// there is no more memory in the pool. Therefore, we introduce the concept +// of region. Every bulk of memory got from tcmalloc is called a region. +// And the region is the unit of RDMA registration. The caller must be able +// to get the LKey of the region from the pool, which we call it region ID. +// +// Since IOBuf supports different block size, the block_pool also supports +// several block sizes: 8KB(default), 64KB and 2MB. The block allocated to +// the caller is the block with minimum size which is larger than the +// applied size. For example, if the caller needs a buffer with a size of +// 9KB, block_pool will allocate a 64KB-block for it. Please remember that +// different-size blocks are in different regions. +// +// Currently, the block_pool supports 16 regions at most. If there is more than +// one region, the complexity of finding which region an address belongs to +// is O(n). Here n is the number of regions. In order to avoid race conditions +// among threads, we do not use more efficient search data structure. +// Therefore, DO NOT rely on the scalable feature of block_pool too much. The +// developper should estimate the consumption of memory used for RDMA in +// advance as possible as she/he can. Besides, if it is possible, please use +// one size of blocks only. +// +// The block_pool is thread-safe, so that the caller can call it in different +// threads. However, before calling allocation and deallocation, the caller +// must call initialization of the block_pool. Otherwise the behavior is +// undefined. + +typedef uint32_t (*RegisterCallback)(void*, size_t); + +// Initialize the block pool +// The argument is a callback called when the pool is enlarged with a new +// region. It should be the memory registration in brpc. However, +// in block_pool, we just abstract it into a function to get region id. +// Return the first region's address, NULL if failed and errno is set. +void* InitBlockPool(RegisterCallback cb); + +// Allocate a buf with length at least @a size (require: size>0) +// Return the address allocated, NULL if failed and errno is set. +void* AllocBlock(size_t size); + +// Deallocate the buf (require: buf!=NULL) +// Return 0 if success, -1 if failed and errno is set. +// If the given buf is not in any region, the errno is ERANGE. +int DeallocBlock(void* buf); + +// Get the region ID of the given buf +uint32_t GetRegionId(const void* buf); + +// Return the block size of given block type +// type=1: BLOCK_DEFAULT(8KB) +// type=2: BLOCK_LARGE(64KB) +// type=3: BLOCK_HUGE(2MB) +size_t GetBlockSize(int type); + +// Dump memory pool information +void DumpMemoryPoolInfo(std::ostream& os); + +} // namespace rdma +} // namespace brpc + +#endif // if BRPC_WITH_RDMA + +#endif // BRPC_RDMA_BLOCK_POOL_H diff --git a/src/brpc/rdma/rdma_endpoint.cpp b/src/brpc/rdma/rdma_endpoint.cpp new file mode 100644 index 0000000000..299443525f --- /dev/null +++ b/src/brpc/rdma/rdma_endpoint.cpp @@ -0,0 +1,1454 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#if BRPC_WITH_RDMA + +#include +#include "butil/fd_utility.h" +#include "butil/logging.h" // CHECK, LOG +#include "butil/sys_byteorder.h" // HostToNet,NetToHost +#include "bthread/bthread.h" +#include "brpc/errno.pb.h" +#include "brpc/event_dispatcher.h" +#include "brpc/input_messenger.h" +#include "brpc/socket.h" +#include "brpc/reloadable_flags.h" +#include "brpc/rdma/block_pool.h" +#include "brpc/rdma/rdma_helper.h" +#include "brpc/rdma/rdma_endpoint.h" + + +namespace brpc { +namespace rdma { + +extern ibv_cq* (*IbvCreateCq)(ibv_context*, int, void*, ibv_comp_channel*, int); +extern int (*IbvDestroyCq)(ibv_cq*); +extern ibv_comp_channel* (*IbvCreateCompChannel)(ibv_context*); +extern int (*IbvDestroyCompChannel)(ibv_comp_channel*); +extern int (*IbvGetCqEvent)(ibv_comp_channel*, ibv_cq**, void**); +extern void (*IbvAckCqEvents)(ibv_cq*, unsigned int); +extern ibv_qp* (*IbvCreateQp)(ibv_pd*, ibv_qp_init_attr*); +extern int (*IbvModifyQp)(ibv_qp*, ibv_qp_attr*, ibv_qp_attr_mask); +extern int (*IbvQueryQp)(ibv_qp*, ibv_qp_attr*, ibv_qp_attr_mask, ibv_qp_init_attr*); +extern int (*IbvDestroyQp)(ibv_qp*); +extern bool g_skip_rdma_init; + +DEFINE_int32(rdma_sq_size, 128, "SQ size for RDMA"); +DEFINE_int32(rdma_rq_size, 128, "RQ size for RDMA"); +DEFINE_bool(rdma_recv_zerocopy, true, "Enable zerocopy for receive side"); +DEFINE_int32(rdma_zerocopy_min_size, 512, "The minimal size for receive zerocopy"); +DEFINE_string(rdma_recv_block_type, "default", "Default size type for recv WR: " + "default(8KB - 32B)/large(64KB - 32B)/huge(2MB - 32B)"); +DEFINE_int32(rdma_cqe_poll_once, 32, "The maximum of cqe number polled once."); +DEFINE_int32(rdma_prepared_qp_size, 128, "SQ and RQ size for prepared QP."); +DEFINE_int32(rdma_prepared_qp_cnt, 1024, "Initial count of prepared QP."); +DEFINE_bool(rdma_trace_verbose, false, "Print log message verbosely"); +BRPC_VALIDATE_GFLAG(rdma_trace_verbose, brpc::PassValidate); + +static const size_t IOBUF_BLOCK_HEADER_LEN = 32; // implementation-dependent +static const size_t IOBUF_BLOCK_DEFAULT_PAYLOAD = + butil::IOBuf::DEFAULT_BLOCK_SIZE - IOBUF_BLOCK_HEADER_LEN; + +// DO NOT change this value unless you know the safe value!!! +// This is the number of reserved WRs in SQ/RQ for pure ACK. +static const size_t RESERVED_WR_NUM = 3; + +// magic string RDMA (4B) +// message length (2B) +// hello version (2B) +// impl version (2B): 0 means should use tcp +// block size (2B) +// sq size (2B) +// rq size (2B) +// GID (16B) +// QP number (4B) +static const char* MAGIC_STR = "RDMA"; +static const size_t MAGIC_STR_LEN = 4; +static const size_t HELLO_MSG_LEN_MIN = 38; +static const size_t HELLO_MSG_LEN_MAX = 4096; +static const size_t ACK_MSG_LEN = 4; +static uint16_t g_rdma_hello_msg_len = 38; // In Byte +static uint16_t g_rdma_hello_version = 1; +static uint16_t g_rdma_impl_version = 1; +static uint16_t g_rdma_recv_block_size = 0; + +static const uint32_t MAX_INLINE_DATA = 64; +static const uint8_t MAX_HOP_LIMIT = 16; +static const uint8_t TIMEOUT = 14; +static const uint8_t RETRY_CNT = 7; +static const uint16_t MIN_QP_SIZE = 16; +static const uint16_t MIN_BLOCK_SIZE = 1024; +static const uint32_t ACK_MSG_RDMA_OK = 0x1; + +static butil::Mutex* g_rdma_resource_mutex = NULL; +static RdmaResource* g_rdma_resource_list = NULL; + +struct HelloMessage { + void Serialize(void* data) const; + void Deserialize(void* data); + + uint16_t msg_len; + uint16_t hello_ver; + uint16_t impl_ver; + uint16_t block_size; + uint16_t sq_size; + uint16_t rq_size; + uint16_t lid; + ibv_gid gid; + uint32_t qp_num; +}; + +void HelloMessage::Serialize(void* data) const { + uint16_t* current_pos = (uint16_t*)data; + *(current_pos++) = butil::HostToNet16(msg_len); + *(current_pos++) = butil::HostToNet16(hello_ver); + *(current_pos++) = butil::HostToNet16(impl_ver); + *(current_pos++) = butil::HostToNet16(block_size); + *(current_pos++) = butil::HostToNet16(sq_size); + *(current_pos++) = butil::HostToNet16(rq_size); + *(current_pos++) = butil::HostToNet16(lid); + memcpy(current_pos, gid.raw, 16); + uint32_t* qp_num_pos = (uint32_t*)((char*)current_pos + 16); + *qp_num_pos = butil::HostToNet32(qp_num); +} + +void HelloMessage::Deserialize(void* data) { + uint16_t* current_pos = (uint16_t*)data; + msg_len = butil::NetToHost16(*current_pos++); + hello_ver = butil::NetToHost16(*current_pos++); + impl_ver = butil::NetToHost16(*current_pos++); + block_size = butil::NetToHost16(*current_pos++); + sq_size = butil::NetToHost16(*current_pos++); + rq_size = butil::NetToHost16(*current_pos++); + lid = butil::NetToHost16(*current_pos++); + memcpy(gid.raw, current_pos, 16); + qp_num = butil::NetToHost32(*(uint32_t*)((char*)current_pos + 16)); +} + +RdmaResource::RdmaResource() + : qp(NULL) + , cq(NULL) + , comp_channel(NULL) + , next(NULL) { } + +RdmaResource::~RdmaResource() { + if (qp) { + IbvDestroyQp(qp); + qp = NULL; + } + if (cq) { + IbvDestroyCq(cq); + cq = NULL; + } + if (comp_channel) { + IbvDestroyCompChannel(comp_channel); + comp_channel = NULL; + } +} + +RdmaEndpoint::RdmaEndpoint(Socket* s) + : _socket(s) + , _state(UNINIT) + , _resource(NULL) + , _cq_events(0) + , _cq_sid(INVALID_SOCKET_ID) + , _sq_size(FLAGS_rdma_sq_size) + , _rq_size(FLAGS_rdma_rq_size) + , _sbuf() + , _rbuf() + , _rbuf_data() + , _remote_recv_block_size(0) + , _accumulated_ack(0) + , _unsolicited(0) + , _unsolicited_bytes(0) + , _sq_current(0) + , _sq_unsignaled(0) + , _sq_sent(0) + , _rq_received(0) + , _local_window_capacity(0) + , _remote_window_capacity(0) + , _window_size(0) + , _new_rq_wrs(0) +{ + if (_sq_size < MIN_QP_SIZE) { + _sq_size = MIN_QP_SIZE; + } + if (_rq_size < MIN_QP_SIZE) { + _rq_size = MIN_QP_SIZE; + } + _read_butex = bthread::butex_create_checked >(); +} + +RdmaEndpoint::~RdmaEndpoint() { + Reset(); + bthread::butex_destroy(_read_butex); +} + +void RdmaEndpoint::Reset() { + DeallocateResources(); + + _cq_events = 0; + _cq_sid = INVALID_SOCKET_ID; + _state = UNINIT; + _sbuf.clear(); + _rbuf.clear(); + _rbuf_data.clear(); + _accumulated_ack = 0; + _unsolicited = 0; + _sq_current = 0; + _sq_unsignaled = 0; + _local_window_capacity = 0; + _remote_window_capacity = 0; + _window_size.store(0, butil::memory_order_relaxed); + _new_rq_wrs = 0; + _sq_sent = 0; + _rq_received = 0; +} + +void RdmaConnect::StartConnect(const Socket* socket, + void (*done)(int err, void* data), + void* data) { + CHECK(socket->_rdma_ep != NULL); + SocketUniquePtr s; + if (Socket::Address(socket->id(), &s) != 0) { + return; + } + if (!IsRdmaAvailable()) { + socket->_rdma_ep->_state = RdmaEndpoint::FALLBACK_TCP; + s->_rdma_state = Socket::RDMA_OFF; + done(0, data); + return; + } + _done = done; + _data = data; + bthread_t tid; + if (bthread_start_background(&tid, &BTHREAD_ATTR_NORMAL, + RdmaEndpoint::ProcessHandshakeAtClient, socket->_rdma_ep) < 0) { + LOG(FATAL) << "Fail to start handshake bthread"; + } else { + s.release(); + } +} + +void RdmaConnect::StopConnect(Socket* socket) { } + +void RdmaConnect::Run() { + _done(errno, _data); +} + +static void TryReadOnTcpDuringRdmaEst(Socket* s) { + int progress = Socket::PROGRESS_INIT; + while (true) { + uint8_t tmp; + ssize_t nr = read(s->fd(), &tmp, 1); + if (nr < 0) { + if (errno != EAGAIN) { + const int saved_errno = errno; + PLOG(WARNING) << "Fail to read from " << s; + s->SetFailed(saved_errno, "Fail to read from %s: %s", + s->description().c_str(), berror(saved_errno)); + return; + } + if (!s->MoreReadEvents(&progress)) { + break; + } + } else if (nr == 0) { + s->SetEOF(); + return; + } else { + LOG(WARNING) << "Read unexpected data from " << s; + s->SetFailed(EPROTO, "Read unexpected data from %s", + s->description().c_str()); + return; + } + } +} + +void RdmaEndpoint::OnNewDataFromTcp(Socket* m) { + RdmaEndpoint* ep = m->_rdma_ep; + CHECK(ep != NULL); + + int progress = Socket::PROGRESS_INIT; + while (true) { + if (ep->_state == UNINIT) { + if (!m->CreatedByConnect()) { + if (!IsRdmaAvailable()) { + ep->_state = FALLBACK_TCP; + m->_rdma_state = Socket::RDMA_OFF; + continue; + } + bthread_t tid; + ep->_state = S_HELLO_WAIT; + SocketUniquePtr s; + m->ReAddress(&s); + if (bthread_start_background(&tid, &BTHREAD_ATTR_NORMAL, + ProcessHandshakeAtServer, ep) < 0) { + ep->_state = UNINIT; + LOG(FATAL) << "Fail to start handshake bthread"; + } else { + s.release(); + } + } else { + // The connection may be closed or reset before the client + // starts handshake. This will be handled by client handshake. + // Ignore the exception here. + } + } else if (ep->_state < ESTABLISHED) { // during handshake + ep->_read_butex->fetch_add(1, butil::memory_order_release); + bthread::butex_wake(ep->_read_butex); + } else if (ep->_state == FALLBACK_TCP){ // handshake finishes + InputMessenger::OnNewMessages(m); + return; + } else if (ep->_state == ESTABLISHED) { + TryReadOnTcpDuringRdmaEst(ep->_socket); + return; + } + if (!m->MoreReadEvents(&progress)) { + break; + } + } +} + +bool HelloNegotiationValid(HelloMessage& msg) { + if (msg.hello_ver == g_rdma_hello_version && + msg.impl_ver == g_rdma_impl_version && + msg.block_size >= MIN_BLOCK_SIZE && + msg.sq_size >= MIN_QP_SIZE && + msg.rq_size >= MIN_QP_SIZE) { + // This can be modified for future compatibility + return true; + } + return false; +} + +static const int WAIT_TIMEOUT_MS = 50; + +int RdmaEndpoint::ReadFromFd(void* data, size_t len) { + CHECK(data != NULL); + int nr = 0; + size_t received = 0; + do { + const int expected_val = _read_butex->load(butil::memory_order_acquire); + const timespec duetime = butil::milliseconds_from_now(WAIT_TIMEOUT_MS); + nr = read(_socket->fd(), (uint8_t*)data + received, len - received); + if (nr < 0) { + if (errno == EAGAIN) { + if (bthread::butex_wait(_read_butex, expected_val, &duetime) < 0) { + if (errno != EWOULDBLOCK && errno != ETIMEDOUT) { + return -1; + } + } + } else { + return -1; + } + } else if (nr == 0) { // Got EOF + errno = EEOF; + return -1; + } else { + received += nr; + } + } while (received < len); + return 0; +} + +int RdmaEndpoint::WriteToFd(void* data, size_t len) { + CHECK(data != NULL); + int nw = 0; + size_t written = 0; + do { + const timespec duetime = butil::milliseconds_from_now(WAIT_TIMEOUT_MS); + nw = write(_socket->fd(), (uint8_t*)data + written, len - written); + if (nw < 0) { + if (errno == EAGAIN) { + if (_socket->WaitEpollOut(_socket->fd(), true, &duetime) < 0) { + if (errno != ETIMEDOUT) { + return -1; + } + } + } else { + return -1; + } + } else { + written += nw; + } + } while (written < len); + return 0; +} + +inline void RdmaEndpoint::TryReadOnTcp() { + if (_socket->_nevent.fetch_add(1, butil::memory_order_acq_rel) == 0) { + if (_state == FALLBACK_TCP) { + InputMessenger::OnNewMessages(_socket); + } else if (_state == ESTABLISHED) { + TryReadOnTcpDuringRdmaEst(_socket); + } + } +} + +void* RdmaEndpoint::ProcessHandshakeAtClient(void* arg) { + RdmaEndpoint* ep = static_cast(arg); + SocketUniquePtr s(ep->_socket); + RdmaConnect::RunGuard rg((RdmaConnect*)s->_app_connect.get()); + + LOG_IF(INFO, FLAGS_rdma_trace_verbose) + << "Start handshake on " << s->_local_side; + + uint8_t data[g_rdma_hello_msg_len]; + + // First initialize CQ and QP resources + ep->_state = C_ALLOC_QPCQ; + if (ep->AllocateResources() < 0) { + LOG(WARNING) << "Fallback to tcp:" << s->description(); + s->_rdma_state = Socket::RDMA_OFF; + ep->_state = FALLBACK_TCP; + return NULL; + } + + // Send hello message to server + ep->_state = C_HELLO_SEND; + HelloMessage local_msg; + local_msg.msg_len = g_rdma_hello_msg_len; + local_msg.hello_ver = g_rdma_hello_version; + local_msg.impl_ver = g_rdma_impl_version; + local_msg.block_size = g_rdma_recv_block_size; + local_msg.sq_size = ep->_sq_size; + local_msg.rq_size = ep->_rq_size; + local_msg.lid = GetRdmaLid(); + local_msg.gid = GetRdmaGid(); + if (BAIDU_LIKELY(ep->_resource)) { + local_msg.qp_num = ep->_resource->qp->qp_num; + } else { + // Only happens in UT + local_msg.qp_num = 0; + } + memcpy(data, MAGIC_STR, 4); + local_msg.Serialize((char*)data + 4); + if (ep->WriteToFd(data, g_rdma_hello_msg_len) < 0) { + const int saved_errno = errno; + PLOG(WARNING) << "Fail to send hello message to server:" << s->description(); + s->SetFailed(saved_errno, "Fail to complete rdma handshake from %s: %s", + s->description().c_str(), berror(saved_errno)); + ep->_state = FAILED; + return NULL; + } + + // Check magic str + ep->_state = C_HELLO_WAIT; + if (ep->ReadFromFd(data, MAGIC_STR_LEN) < 0) { + const int saved_errno = errno; + PLOG(WARNING) << "Fail to get hello message from server:" << s->description(); + s->SetFailed(saved_errno, "Fail to complete rdma handshake from %s: %s", + s->description().c_str(), berror(saved_errno)); + ep->_state = FAILED; + return NULL; + } + if (memcmp(data, MAGIC_STR, MAGIC_STR_LEN) != 0) { + LOG(WARNING) << "Read unexpected data during handshake:" << s->description(); + s->SetFailed(EPROTO, "Fail to complete rdma handshake from %s: %s", + s->description().c_str(), berror(EPROTO)); + ep->_state = FAILED; + return NULL; + } + + // Read hello message from server + if (ep->ReadFromFd(data, HELLO_MSG_LEN_MIN - MAGIC_STR_LEN) < 0) { + const int saved_errno = errno; + PLOG(WARNING) << "Fail to get Hello Message from server:" << s->description(); + s->SetFailed(saved_errno, "Fail to complete rdma handshake from %s: %s", + s->description().c_str(), berror(saved_errno)); + ep->_state = FAILED; + return NULL; + } + HelloMessage remote_msg; + remote_msg.Deserialize(data); + if (remote_msg.msg_len < HELLO_MSG_LEN_MIN) { + LOG(WARNING) << "Fail to parse Hello Message length from server:" + << s->description(); + s->SetFailed(EPROTO, "Fail to complete rdma handshake from %s: %s", + s->description().c_str(), berror(EPROTO)); + ep->_state = FAILED; + return NULL; + } + + if (remote_msg.msg_len > HELLO_MSG_LEN_MIN) { + // TODO: Read Hello Message customized data + // Just for future use, should not happen now + } + + if (!HelloNegotiationValid(remote_msg)) { + LOG(WARNING) << "Fail to negotiate with server, fallback to tcp:" + << s->description(); + s->_rdma_state = Socket::RDMA_OFF; + } else { + ep->_remote_recv_block_size = remote_msg.block_size; + ep->_local_window_capacity = + std::min(ep->_sq_size, remote_msg.rq_size) - RESERVED_WR_NUM; + ep->_remote_window_capacity = + std::min(ep->_rq_size, remote_msg.sq_size) - RESERVED_WR_NUM, + ep->_window_size.store(ep->_local_window_capacity, butil::memory_order_relaxed); + + ep->_state = C_BRINGUP_QP; + if (ep->BringUpQp(remote_msg.lid, remote_msg.gid, remote_msg.qp_num) < 0) { + LOG(WARNING) << "Fail to bringup QP, fallback to tcp:" << s->description(); + s->_rdma_state = Socket::RDMA_OFF; + } else { + s->_rdma_state = Socket::RDMA_ON; + } + } + + // Send ACK message to server + ep->_state = C_ACK_SEND; + uint32_t flags = 0; + if (s->_rdma_state != Socket::RDMA_OFF) { + flags |= ACK_MSG_RDMA_OK; + } + *(uint32_t*)data = butil::HostToNet32(flags); + if (ep->WriteToFd(data, ACK_MSG_LEN) < 0) { + const int saved_errno = errno; + PLOG(WARNING) << "Fail to send Ack Message to server:" << s->description(); + s->SetFailed(saved_errno, "Fail to complete rdma handshake from %s: %s", + s->description().c_str(), berror(saved_errno)); + ep->_state = FAILED; + return NULL; + } + + if (s->_rdma_state == Socket::RDMA_ON) { + ep->_state = ESTABLISHED; + LOG_IF(INFO, FLAGS_rdma_trace_verbose) + << "Handshake ends (use rdma) on " << s->description(); + } else { + ep->_state = FALLBACK_TCP; + LOG_IF(INFO, FLAGS_rdma_trace_verbose) + << "Handshake ends (use tcp) on " << s->description(); + } + + errno = 0; + + return NULL; +} + +void* RdmaEndpoint::ProcessHandshakeAtServer(void* arg) { + RdmaEndpoint* ep = static_cast(arg); + SocketUniquePtr s(ep->_socket); + + LOG_IF(INFO, FLAGS_rdma_trace_verbose) + << "Start handshake on " << s->description(); + + uint8_t data[g_rdma_hello_msg_len]; + + ep->_state = S_HELLO_WAIT; + if (ep->ReadFromFd(data, MAGIC_STR_LEN) < 0) { + const int saved_errno = errno; + PLOG(WARNING) << "Fail to read Hello Message from client:" << s->description() << " " << s->_remote_side; + s->SetFailed(saved_errno, "Fail to complete rdma handshake from %s: %s", + s->description().c_str(), berror(saved_errno)); + ep->_state = FAILED; + return NULL; + } + + if (memcmp(data, MAGIC_STR, MAGIC_STR_LEN) != 0) { + LOG_IF(INFO, FLAGS_rdma_trace_verbose) << "It seems that the " + << "client does not use RDMA, fallback to TCP:" + << s->description(); + // we need to copy data read back to _socket->_read_buf + s->_read_buf.append(data, MAGIC_STR_LEN); + ep->_state = FALLBACK_TCP; + s->_rdma_state = Socket::RDMA_OFF; + ep->TryReadOnTcp(); + return NULL; + } + + if (ep->ReadFromFd(data, g_rdma_hello_msg_len - MAGIC_STR_LEN) < 0) { + const int saved_errno = errno; + PLOG(WARNING) << "Fail to read Hello Message from client:" << s->description(); + s->SetFailed(saved_errno, "Fail to complete rdma handshake from %s: %s", + s->description().c_str(), berror(saved_errno)); + ep->_state = FAILED; + return NULL; + } + + HelloMessage remote_msg; + remote_msg.Deserialize(data); + if (remote_msg.msg_len < HELLO_MSG_LEN_MIN) { + LOG(WARNING) << "Fail to parse Hello Message length from client:" + << s->description(); + s->SetFailed(EPROTO, "Fail to complete rdma handshake from %s: %s", + s->description().c_str(), berror(EPROTO)); + ep->_state = FAILED; + return NULL; + } + if (remote_msg.msg_len > HELLO_MSG_LEN_MIN) { + // TODO: Read Hello Message customized header + // Just for future use, should not happen now + } + + if (!HelloNegotiationValid(remote_msg)) { + LOG(WARNING) << "Fail to negotiate with client, fallback to tcp:" + << s->description(); + s->_rdma_state = Socket::RDMA_OFF; + } else { + ep->_remote_recv_block_size = remote_msg.block_size; + ep->_local_window_capacity = + std::min(ep->_sq_size, remote_msg.rq_size) - RESERVED_WR_NUM; + ep->_remote_window_capacity = + std::min(ep->_rq_size, remote_msg.sq_size) - RESERVED_WR_NUM, + ep->_window_size.store(ep->_local_window_capacity, butil::memory_order_relaxed); + + ep->_state = S_ALLOC_QPCQ; + if (ep->AllocateResources() < 0) { + LOG(WARNING) << "Fail to allocate rdma resources, fallback to tcp:" + << s->description(); + s->_rdma_state = Socket::RDMA_OFF; + } else { + ep->_state = S_BRINGUP_QP; + if (ep->BringUpQp(remote_msg.lid, remote_msg.gid, remote_msg.qp_num) < 0) { + LOG(WARNING) << "Fail to bringup QP, fallback to tcp:" + << s->description(); + s->_rdma_state = Socket::RDMA_OFF; + } + } + } + + // Send hello message to client + ep->_state = S_HELLO_SEND; + HelloMessage local_msg; + local_msg.msg_len = g_rdma_hello_msg_len; + if (s->_rdma_state == Socket::RDMA_OFF) { + local_msg.impl_ver = 0; + local_msg.hello_ver = 0; + } else { + local_msg.lid = GetRdmaLid(); + local_msg.gid = GetRdmaGid(); + local_msg.block_size = g_rdma_recv_block_size; + local_msg.sq_size = ep->_sq_size; + local_msg.rq_size = ep->_rq_size; + local_msg.hello_ver = g_rdma_hello_version; + local_msg.impl_ver = g_rdma_impl_version; + if (BAIDU_LIKELY(ep->_resource)) { + local_msg.qp_num = ep->_resource->qp->qp_num; + } else { + // Only happens in UT + local_msg.qp_num = 0; + } + } + memcpy(data, MAGIC_STR, 4); + local_msg.Serialize((char*)data + 4); + if (ep->WriteToFd(data, g_rdma_hello_msg_len) < 0) { + const int saved_errno = errno; + PLOG(WARNING) << "Fail to send Hello Message to client:" << s->description(); + s->SetFailed(saved_errno, "Fail to complete rdma handshake from %s: %s", + s->description().c_str(), berror(saved_errno)); + ep->_state = FAILED; + return NULL; + } + + // Recv ACK Message + ep->_state = S_ACK_WAIT; + if (ep->ReadFromFd(data, ACK_MSG_LEN) < 0) { + const int saved_errno = errno; + PLOG(WARNING) << "Fail to read ack message from client:" << s->description(); + s->SetFailed(saved_errno, "Fail to complete rdma handshake from %s: %s", + s->description().c_str(), berror(saved_errno)); + ep->_state = FAILED; + return NULL; + } + + // Check RDMA enable flag + uint32_t flags = butil::NetToHost32(*(uint32_t*)data); + if (flags & ACK_MSG_RDMA_OK) { + if (s->_rdma_state == Socket::RDMA_OFF) { + LOG(WARNING) << "Fail to parse Hello Message length from client:" + << s->description(); + s->SetFailed(EPROTO, "Fail to complete rdma handshake from %s: %s", + s->description().c_str(), berror(EPROTO)); + ep->_state = FAILED; + return NULL; + } else { + s->_rdma_state = Socket::RDMA_ON; + ep->_state = ESTABLISHED; + LOG_IF(INFO, FLAGS_rdma_trace_verbose) + << "Handshake ends (use rdma) on " << s->description(); + } + } else { + s->_rdma_state = Socket::RDMA_OFF; + ep->_state = FALLBACK_TCP; + LOG_IF(INFO, FLAGS_rdma_trace_verbose) + << "Handshake ends (use tcp) on " << s->description(); + } + + ep->TryReadOnTcp(); + + return NULL; +} + +bool RdmaEndpoint::IsWritable() const { + if (BAIDU_UNLIKELY(g_skip_rdma_init)) { + // Just for UT + return false; + } + + return _window_size.load(butil::memory_order_relaxed) > 0; +} + +// RdmaIOBuf inherits from IOBuf to provide a new function. +// The reason is that we need to use some protected member function of IOBuf. +class RdmaIOBuf : public butil::IOBuf { +friend class RdmaEndpoint; +private: + // Cut the current IOBuf to ibv_sge list and `to' for at most first max_sge + // blocks or first max_len bytes. + // Return: the bytes included in the sglist, or -1 if failed + ssize_t cut_into_sglist_and_iobuf(ibv_sge* sglist, size_t* sge_index, + butil::IOBuf* to, size_t max_sge, size_t max_len) { + size_t len = 0; + while (*sge_index < max_sge) { + if (len == max_len || _ref_num() == 0) { + break; + } + butil::IOBuf::BlockRef const& r = _ref_at(0); + CHECK(r.length > 0); + const void* start = fetch1(); + uint32_t lkey = GetLKey((char*)start - r.offset); + if (lkey == 0) { + LOG(WARNING) << "Memory not registered for rdma. " + << "Is this iobuf allocated before calling " + << "GlobalRdmaInitializeOrDie? Or just forget to " + << "call RegisterMemoryForRdma for your own buffer?"; + errno = ERDMAMEM; + return -1; + } + size_t i = *sge_index; + if (len + r.length > max_len) { + // Split the block to comply with size for receiving + sglist[i].length = max_len - len; + len = max_len; + } else { + sglist[i].length = r.length; + len += r.length; + } + sglist[i].addr = (uint64_t)start; + sglist[i].lkey = lkey; + cutn(to, sglist[i].length); + (*sge_index)++; + } + return len; + } +}; + +// Note this function is coupled with the implementation of IOBuf +ssize_t RdmaEndpoint::CutFromIOBufList(butil::IOBuf** from, size_t ndata) { + if (BAIDU_UNLIKELY(g_skip_rdma_init)) { + // Just for UT + errno = EAGAIN; + return -1; + } + + CHECK(from != NULL); + CHECK(ndata > 0); + + size_t total_len = 0; + size_t current = 0; + uint32_t window = 0; + ibv_send_wr wr; + int max_sge = GetRdmaMaxSge(); + ibv_sge sglist[max_sge]; + while (current < ndata) { + window = _window_size.load(butil::memory_order_relaxed); + if (window == 0) { + if (total_len > 0) { + break; + } else { + errno = EAGAIN; + return -1; + } + } + butil::IOBuf* to = &_sbuf[_sq_current]; + size_t this_len = 0; + + memset(&wr, 0, sizeof(wr)); + wr.sg_list = sglist; + wr.opcode = IBV_WR_SEND_WITH_IMM; + + RdmaIOBuf* data = (RdmaIOBuf*)from[current]; + size_t sge_index = 0; + while (sge_index < (uint32_t)max_sge && + this_len < _remote_recv_block_size) { + if (data->size() == 0) { + // The current IOBuf is empty, find next one + ++current; + if (current == ndata) { + break; + } + data = (RdmaIOBuf*)from[current]; + continue; + } + + ssize_t len = data->cut_into_sglist_and_iobuf( + sglist, &sge_index, to, max_sge, + _remote_recv_block_size - this_len); + if (len < 0) { + return -1; + } + CHECK(len > 0); + this_len += len; + total_len += len; + } + if (this_len == 0) { + continue; + } + + wr.num_sge = sge_index; + + uint32_t imm = _new_rq_wrs.exchange(0, butil::memory_order_relaxed); + wr.imm_data = butil::HostToNet32(imm); + // Avoid too much recv completion event to reduce the cpu overhead + bool solicited = false; + if (window == 1 || current + 1 >= ndata) { + // Only last message in the write queue or last message in the + // current window will be flagged as solicited. + solicited = true; + } else { + if (_unsolicited > _local_window_capacity / 4) { + // Make sure the recv side can be signaled to return ack + solicited = true; + } else if (_accumulated_ack > _remote_window_capacity / 4) { + // Make sure the recv side can be signaled to handle ack + solicited = true; + } else if (_unsolicited_bytes > 1048576) { + // Make sure the recv side can be signaled when it receives enough data + solicited = true; + } else { + ++_unsolicited; + _unsolicited_bytes += this_len; + _accumulated_ack += imm; + } + } + if (solicited) { + wr.send_flags |= IBV_SEND_SOLICITED; + _unsolicited = 0; + _unsolicited_bytes = 0; + _accumulated_ack = 0; + } + + // Avoid too much send completion event to reduce the CPU overhead + ++_sq_unsignaled; + if (_sq_unsignaled >= _local_window_capacity / 4) { + // Refer to: + // http::www.rdmamojo.com/2014/06/30/working-unsignaled-completions/ + wr.send_flags |= IBV_SEND_SIGNALED; + _sq_unsignaled = 0; + } + + ibv_send_wr* bad = NULL; + if (ibv_post_send(_resource->qp, &wr, &bad) < 0) { + // We use other way to guarantee the Send Queue is not full. + // So we just consider this error as an unrecoverable error. + PLOG(WARNING) << "Fail to ibv_post_send"; + return -1; + } + + ++_sq_current; + if (_sq_current == _sq_size - RESERVED_WR_NUM) { + _sq_current = 0; + } + + // Update _window_size. Note that _window_size will never be negative. + // Because there is at most one thread can enter this function for each + // Socket, and the other thread of HandleCompletion can only add this + // counter. + _window_size.fetch_sub(1, butil::memory_order_relaxed); + } + + return total_len; +} + +int RdmaEndpoint::SendAck(int num) { + if (_new_rq_wrs.fetch_add(num, butil::memory_order_relaxed) > _remote_window_capacity / 2) { + return SendImm(_new_rq_wrs.exchange(0, butil::memory_order_relaxed)); + } + return 0; +} + +int RdmaEndpoint::SendImm(uint32_t imm) { + if (imm == 0) { + return 0; + } + + ibv_send_wr wr; + memset(&wr, 0, sizeof(wr)); + wr.opcode = IBV_WR_SEND_WITH_IMM; + wr.imm_data = butil::HostToNet32(imm); + wr.send_flags |= IBV_SEND_SOLICITED; + wr.send_flags |= IBV_SEND_SIGNALED; + + ibv_send_wr* bad = NULL; + if (ibv_post_send(_resource->qp, &wr, &bad) < 0) { + // We use other way to guarantee the Send Queue is not full. + // So we just consider this error as an unrecoverable error. + PLOG(WARNING) << "Fail to ibv_post_send"; + return -1; + } + return 0; +} + +ssize_t RdmaEndpoint::HandleCompletion(ibv_wc& wc) { + bool zerocopy = FLAGS_rdma_recv_zerocopy; + switch (wc.opcode) { + case IBV_WC_SEND: { // send completion + // Do nothing + break; + } + case IBV_WC_RECV: { // recv completion + // Please note that only the first wc.byte_len bytes is valid + if (wc.byte_len > 0) { + if (wc.byte_len < (uint32_t)FLAGS_rdma_zerocopy_min_size) { + zerocopy = false; + } + CHECK(_state != FALLBACK_TCP); + if (zerocopy) { + butil::IOBuf tmp; + _rbuf[_rq_received].cutn(&tmp, wc.byte_len); + _socket->_read_buf.append(tmp); + } else { + // Copy data when the receive data is really small + _socket->_read_buf.append(_rbuf_data[_rq_received], wc.byte_len); + } + } + if (wc.imm_data > 0) { + // Clear sbuf here because we ignore event wakeup for send completions + uint32_t acks = butil::NetToHost32(wc.imm_data); + uint32_t num = acks; + while (num > 0) { + _sbuf[_sq_sent++].clear(); + if (_sq_sent == _sq_size - RESERVED_WR_NUM) { + _sq_sent = 0; + } + --num; + } + butil::subtle::MemoryBarrier(); + + // Update window + uint32_t wnd_thresh = _local_window_capacity / 8; + if (_window_size.fetch_add(acks, butil::memory_order_relaxed) >= wnd_thresh + || acks >= wnd_thresh) { + // Do not wake up writing thread right after _window_size > 0. + // Otherwise the writing thread may switch to background too quickly. + _socket->WakeAsEpollOut(); + } + } + // We must re-post recv WR + if (PostRecv(1, zerocopy) < 0) { + return -1; + } + if (wc.byte_len > 0) { + SendAck(1); + } + return wc.byte_len; + } + default: + // Some driver bugs may lead to unexpected completion opcode. + // If this happens, please update your driver. + CHECK(false) << "This should not happen. Got a completion with opcode=" + << wc.opcode; + return -1; + } + return 0; +} + +int RdmaEndpoint::DoPostRecv(void* block, size_t block_size) { + ibv_recv_wr wr; + memset(&wr, 0, sizeof(wr)); + ibv_sge sge; + sge.addr = (uint64_t)block; + sge.length = block_size; + sge.lkey = GetLKey((char*)block - IOBUF_BLOCK_HEADER_LEN); + wr.num_sge = 1; + wr.sg_list = &sge; + + ibv_recv_wr* bad = NULL; + if (ibv_post_recv(_resource->qp, &wr, &bad) < 0) { + PLOG(WARNING) << "Fail to ibv_post_recv"; + return -1; + } + return 0; +} + +int RdmaEndpoint::PostRecv(uint32_t num, bool zerocopy) { + // We do the post repeatedly from the _rbuf[_rq_received]. + while (num > 0) { + if (zerocopy) { + _rbuf[_rq_received].clear(); + butil::IOBufAsZeroCopyOutputStream os(&_rbuf[_rq_received], + g_rdma_recv_block_size + IOBUF_BLOCK_HEADER_LEN); + int size = 0; + if (!os.Next(&_rbuf_data[_rq_received], &size)) { + // Memory is not enough for preparing a block + PLOG(WARNING) << "Fail to allocate rbuf"; + return -1; + } else { + CHECK(size == g_rdma_recv_block_size) << size; + } + } + if (DoPostRecv(_rbuf_data[_rq_received], g_rdma_recv_block_size) < 0) { + _rbuf[_rq_received].clear(); + return -1; + } + --num; + ++_rq_received; + if (_rq_received == _rq_size) { + _rq_received = 0; + } + }; + return 0; +} + +static RdmaResource* AllocateQpCq() { + RdmaResource* res = new (std::nothrow) RdmaResource; + if (!res) { + return NULL; + } + + res->comp_channel = IbvCreateCompChannel(GetRdmaContext()); + if (!res->comp_channel) { + PLOG(WARNING) << "Fail to create comp channel for CQ"; + delete res; + return NULL; + } + + butil::make_close_on_exec(res->comp_channel->fd); + if (butil::make_non_blocking(res->comp_channel->fd) < 0) { + PLOG(WARNING) << "Fail to set comp channel nonblocking"; + delete res; + return NULL; + } + + res->cq = IbvCreateCq(GetRdmaContext(), 2 * FLAGS_rdma_prepared_qp_size, + NULL, res->comp_channel, GetRdmaCompVector()); + if (!res->cq) { + PLOG(WARNING) << "Fail to create CQ"; + delete res; + return NULL; + } + + ibv_qp_init_attr attr; + memset(&attr, 0, sizeof(attr)); + attr.send_cq = res->cq; + attr.recv_cq = res->cq; + attr.cap.max_send_wr = FLAGS_rdma_prepared_qp_size; + attr.cap.max_recv_wr = FLAGS_rdma_prepared_qp_size; + attr.cap.max_send_sge = GetRdmaMaxSge(); + attr.cap.max_recv_sge = 1; + attr.qp_type = IBV_QPT_RC; + res->qp = IbvCreateQp(GetRdmaPd(), &attr); + if (!res->qp) { + PLOG(WARNING) << "Fail to create QP"; + delete res; + return NULL; + } + + return res; +} + +int RdmaEndpoint::AllocateResources() { + if (BAIDU_UNLIKELY(g_skip_rdma_init)) { + // For UT + return 0; + } + + CHECK(_resource == NULL); + + if (_sq_size <= FLAGS_rdma_prepared_qp_size && + _rq_size <= FLAGS_rdma_prepared_qp_size) { + BAIDU_SCOPED_LOCK(*g_rdma_resource_mutex); + if (g_rdma_resource_list) { + _resource = g_rdma_resource_list; + g_rdma_resource_list = g_rdma_resource_list->next; + } + } + if (!_resource) { + _resource = AllocateQpCq(); + } else { + _resource->next = NULL; + } + if (!_resource) { + return -1; + } + + SocketOptions options; + options.user = this; + options.keytable_pool = _socket->_keytable_pool; + options.fd = _resource->comp_channel->fd; + options.on_edge_triggered_events = PollCq; + if (Socket::Create(options, &_cq_sid) < 0) { + PLOG(WARNING) << "Fail to create socket for cq"; + return -1; + } + + if (ibv_req_notify_cq(_resource->cq, 1) < 0) { + PLOG(WARNING) << "Fail to arm CQ comp channel"; + return -1; + } + + _sbuf.resize(_sq_size - RESERVED_WR_NUM); + if (_sbuf.size() != _sq_size - RESERVED_WR_NUM) { + return -1; + } + _rbuf.resize(_rq_size); + if (_rbuf.size() != _rq_size) { + return -1; + } + _rbuf_data.resize(_rq_size, NULL); + if (_rbuf_data.size() != _rq_size) { + return -1; + } + + return 0; +} + +int RdmaEndpoint::BringUpQp(uint16_t lid, ibv_gid gid, uint32_t qp_num) { + if (BAIDU_UNLIKELY(g_skip_rdma_init)) { + // For UT + return 0; + } + + ibv_qp_attr attr; + + attr.qp_state = IBV_QPS_INIT; + attr.pkey_index = 0; // TODO: support more pkey use in future + attr.port_num = GetRdmaPortNum(); + attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE; + if (IbvModifyQp(_resource->qp, &attr, (ibv_qp_attr_mask)( + IBV_QP_STATE | + IBV_QP_PKEY_INDEX | + IBV_QP_PORT | + IBV_QP_ACCESS_FLAGS)) < 0) { + PLOG(WARNING) << "Fail to modify QP from RESET to INIT"; + return -1; + } + + if (PostRecv(_rq_size, true) < 0) { + PLOG(WARNING) << "Fail to post recv wr"; + return -1; + } + + attr.qp_state = IBV_QPS_RTR; + attr.path_mtu = IBV_MTU_1024; // TODO: support more mtu in future + attr.ah_attr.grh.dgid = gid; + attr.ah_attr.grh.flow_label = 0; + attr.ah_attr.grh.sgid_index = GetRdmaGidIndex(); + attr.ah_attr.grh.hop_limit = MAX_HOP_LIMIT; + attr.ah_attr.grh.traffic_class = 0; + attr.ah_attr.dlid = lid; + attr.ah_attr.sl = 0; + attr.ah_attr.src_path_bits = 0; + attr.ah_attr.static_rate = 0; + attr.ah_attr.is_global = 1; + attr.ah_attr.port_num = GetRdmaPortNum(); + attr.dest_qp_num = qp_num; + attr.rq_psn = 0; + attr.max_dest_rd_atomic = 0; + attr.min_rnr_timer = 0; // We do not allow rnr error + if (IbvModifyQp(_resource->qp, &attr, (ibv_qp_attr_mask)( + IBV_QP_STATE | + IBV_QP_PATH_MTU | + IBV_QP_MIN_RNR_TIMER | + IBV_QP_AV | + IBV_QP_MAX_DEST_RD_ATOMIC | + IBV_QP_DEST_QPN | + IBV_QP_RQ_PSN)) < 0) { + PLOG(WARNING) << "Fail to modify QP from INIT to RTR"; + return -1; + } + + attr.qp_state = IBV_QPS_RTS; + attr.timeout = TIMEOUT; + attr.retry_cnt = RETRY_CNT; + attr.rnr_retry = 0; // We do not allow rnr error + attr.sq_psn = 0; + attr.max_rd_atomic = 0; + if (IbvModifyQp(_resource->qp, &attr, (ibv_qp_attr_mask)( + IBV_QP_STATE | + IBV_QP_RNR_RETRY | + IBV_QP_RETRY_CNT | + IBV_QP_TIMEOUT | + IBV_QP_SQ_PSN | + IBV_QP_MAX_QP_RD_ATOMIC)) < 0) { + PLOG(WARNING) << "Fail to modify QP from RTR to RTS"; + return -1; + } + + return 0; +} + +void RdmaEndpoint::DeallocateResources() { + if (!_resource) { + return; + } + bool move_to_rdma_resource_list = false; + if (_sq_size <= FLAGS_rdma_prepared_qp_size && + _rq_size <= FLAGS_rdma_prepared_qp_size) { + ibv_qp_attr attr; + attr.qp_state = IBV_QPS_RESET; + if (IbvModifyQp(_resource->qp, &attr, IBV_QP_STATE) == 0) { + move_to_rdma_resource_list = true; + } + } + int fd = _resource->comp_channel->fd; + if (!move_to_rdma_resource_list) { + if (_resource->qp) { + if (IbvDestroyQp(_resource->qp) < 0) { + PLOG(WARNING) << "Fail to destroy QP"; + } + } + if (_resource->cq) { + IbvAckCqEvents(_resource->cq, _cq_events); + if (IbvDestroyCq(_resource->cq) < 0) { + PLOG(WARNING) << "Fail to destroy CQ"; + } + } + if (_resource->comp_channel) { + // destroy comp_channel will destroy this fd + // so that we should remove it from epoll fd first + GetGlobalEventDispatcher(fd).RemoveConsumer(fd); + fd = -1; + if (IbvDestroyCompChannel(_resource->comp_channel) < 0) { + PLOG(WARNING) << "Fail to destroy CQ channel"; + } + } + delete _resource; + } + + SocketUniquePtr s; + if (_cq_sid != INVALID_SOCKET_ID) { + if (Socket::Address(_cq_sid, &s) == 0) { + s->_user = NULL; // do not release user (this RdmaEndpoint) + if (fd >= 0) { + GetGlobalEventDispatcher(fd).RemoveConsumer(fd); + } + s->_fd = -1; // already remove fd from epoll fd + s->SetFailed(); + } + _cq_sid = INVALID_SOCKET_ID; + } + + if (!move_to_rdma_resource_list) { + if (_resource->cq) { + IbvAckCqEvents(_resource->cq, _cq_events); + } + BAIDU_SCOPED_LOCK(*g_rdma_resource_mutex); + _resource->next = g_rdma_resource_list; + g_rdma_resource_list = _resource; + } + + _resource = NULL; +} + +static const int MAX_CQ_EVENTS = 128; + +int RdmaEndpoint::GetAndAckEvents() { + int events = 0; void* context = NULL; + while (1) { + if (IbvGetCqEvent(_resource->comp_channel, &_resource->cq, &context) < 0) { + if (errno != EAGAIN) { + return -1; + } + break; + } + ++events; + } + if (events == 0) { + return 0; + } + _cq_events += events; + if (_cq_events >= MAX_CQ_EVENTS) { + IbvAckCqEvents(_resource->cq, _cq_events); + _cq_events = 0; + } + return 0; +} + +void RdmaEndpoint::PollCq(Socket* m) { + RdmaEndpoint* ep = static_cast(m->user()); + if (!ep) { + return; + } + + SocketUniquePtr s; + if (Socket::Address(ep->_socket->id(), &s) < 0) { + return; + } + CHECK(ep == s->_rdma_ep); + + if (ep->GetAndAckEvents() < 0) { + const int saved_errno = errno; + PLOG(ERROR) << "Fail to get cq event: " << s->description(); + s->SetFailed(saved_errno, "Fail to get cq event from %s: %s", + s->description().c_str(), berror(saved_errno)); + return; + } + + int progress = Socket::PROGRESS_INIT; + bool notified = false; + InputMessenger::InputMessageClosure last_msg; + ibv_wc wc[FLAGS_rdma_cqe_poll_once]; + while (true) { + int cnt = ibv_poll_cq(ep->_resource->cq, FLAGS_rdma_cqe_poll_once, wc); + if (cnt < 0) { + const int saved_errno = errno; + PLOG(WARNING) << "Fail to poll cq: " << s->description(); + s->SetFailed(saved_errno, "Fail to poll cq from %s: %s", + s->description().c_str(), berror(saved_errno)); + return; + } + if (cnt == 0) { + if (!notified) { + // Since RDMA only provides one shot event, we have to call the + // notify function every time. Because there is a possibility + // that the event arrives after the poll but before the notify, + // we should re-poll the CQ once after the notify to check if + // there is an available CQE. + if (ibv_req_notify_cq(ep->_resource->cq, 1) < 0) { + const int saved_errno = errno; + PLOG(WARNING) << "Fail to arm CQ comp channel: " << s->description(); + s->SetFailed(saved_errno, "Fail to arm cq channel from %s: %s", + s->description().c_str(), berror(saved_errno)); + return; + } + notified = true; + continue; + } + if (!m->MoreReadEvents(&progress)) { + break; + } + if (ep->GetAndAckEvents() < 0) { + s->SetFailed(errno, "Fail to ack CQ event on %s", + s->description().c_str()); + return; + } + notified = false; + continue; + } + notified = false; + + for (int i = 0; i < cnt; ++i) { + if (s->Failed()) { + continue; + } + + if (wc[i].status != IBV_WC_SUCCESS) { + PLOG(WARNING) << "Fail to handle RDMA completion, error status(" + << wc[i].status << "): " << s->description(); + s->SetFailed(ERDMA, "RDMA completion error(%d) from %s: %s", + wc[i].status, s->description().c_str(), berror(ERDMA)); + continue; + } + + ssize_t nr = ep->HandleCompletion(wc[i]); + if (nr < 0) { + const int saved_errno = errno; + PLOG(WARNING) << "Fail to handle RDMA completion: " << s->description(); + s->SetFailed(saved_errno, "Fail to handle rdma completion from %s: %s", + s->description().c_str(), berror(saved_errno)); + } else if (nr > 0) { + const int64_t received_us = butil::cpuwide_time_us(); + const int64_t base_realtime = butil::gettimeofday_us() - received_us; + InputMessenger* messenger = static_cast(s->user()); + if (messenger->ProcessNewMessage( + s.get(), nr, false, received_us, base_realtime, last_msg) < 0) { + return; + } + } + } + } +} + +std::string RdmaEndpoint::GetStateStr() const { + switch (_state) { + case UNINIT: return "UNINIT"; + case C_ALLOC_QPCQ: return "C_ALLOC_QPCQ"; + case C_HELLO_SEND: return "C_HELLO_SEND"; + case C_HELLO_WAIT: return "C_HELLO_WAIT"; + case C_BRINGUP_QP: return "C_BRINGUP_QP"; + case C_ACK_SEND: return "C_ACK_SEND"; + case S_HELLO_WAIT: return "S_HELLO_WAIT"; + case S_ALLOC_QPCQ: return "S_ALLOC_QPCQ"; + case S_BRINGUP_QP: return "S_BRINGUP_QP"; + case S_HELLO_SEND: return "S_HELLO_SEND"; + case S_ACK_WAIT: return "S_ACK_WAIT"; + case ESTABLISHED: return "ESTABLISHED"; + case FALLBACK_TCP: return "FALLBACK_TCP"; + case FAILED: return "FAILED"; + default: return "UNKNOWN"; + } +} + +void RdmaEndpoint::DebugInfo(std::ostream& os) const { + os << "\nrdma_state=ON" + << "\nhandshake_state=" << GetStateStr() + << "\nrdma_window_size=" << _window_size.load(butil::memory_order_relaxed) + << "\nrdma_local_window_capacity=" << _local_window_capacity + << "\nrdma_remote_window_capacity=" << _remote_window_capacity + << "\nrdma_sbuf_head=" << _sq_current + << "\nrdma_sbuf_tail=" << _sq_sent + << "\nrdma_rbuf_head=" << _rq_received + << "\nrdma_unacked_rq_wr=" << _new_rq_wrs + << "\nrdma_received_ack=" << _accumulated_ack + << "\nrdma_unsolicited_sent=" << _unsolicited + << "\nrdma_unsignaled_sq_wr=" << _sq_unsignaled + << "\n"; +} + +int RdmaEndpoint::GlobalInitialize() { + if (FLAGS_rdma_recv_block_type == "default") { + g_rdma_recv_block_size = GetBlockSize(0) - IOBUF_BLOCK_HEADER_LEN; + } else if (FLAGS_rdma_recv_block_type == "large") { + g_rdma_recv_block_size = GetBlockSize(1) - IOBUF_BLOCK_HEADER_LEN; + } else if (FLAGS_rdma_recv_block_type == "huge") { + g_rdma_recv_block_size = GetBlockSize(2) - IOBUF_BLOCK_HEADER_LEN; + } else { + errno = EINVAL; + return -1; + } + + g_rdma_resource_mutex = new butil::Mutex; + for (int i = 0; i < FLAGS_rdma_prepared_qp_cnt; ++i) { + RdmaResource* res = AllocateQpCq(); + if (!res) { + return -1; + } + res->next = g_rdma_resource_list; + g_rdma_resource_list = res; + } + + return 0; +} + +void RdmaEndpoint::GlobalRelease() { + if (g_rdma_resource_mutex) { + BAIDU_SCOPED_LOCK(*g_rdma_resource_mutex); + while (g_rdma_resource_list) { + RdmaResource* res = g_rdma_resource_list; + g_rdma_resource_list = g_rdma_resource_list->next; + delete res; + } + } +} + +} // namespace rdma +} // namespace brpc + +#endif // if BRPC_WITH_RDMA diff --git a/src/brpc/rdma/rdma_endpoint.h b/src/brpc/rdma/rdma_endpoint.h new file mode 100644 index 0000000000..663595a635 --- /dev/null +++ b/src/brpc/rdma/rdma_endpoint.h @@ -0,0 +1,258 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef BRPC_RDMA_ENDPOINT_H +#define BRPC_RDMA_ENDPOINT_H + +#if BRPC_WITH_RDMA +#include +#include +#include +#include +#include +#include "butil/atomicops.h" +#include "butil/iobuf.h" +#include "butil/macros.h" +#include "brpc/socket.h" + + +namespace brpc { +class Socket; +namespace rdma { + +class RdmaConnect : public AppConnect { +public: + void StartConnect(const Socket* socket, + void (*done)(int err, void* data), void* data) override; + void StopConnect(Socket*) override; + struct RunGuard { + RunGuard(RdmaConnect* rc) { this_rc = rc; } + ~RunGuard() { if (this_rc) this_rc->Run(); } + RdmaConnect* this_rc; + }; + +private: + void Run(); + void (*_done)(int, void*); + void* _data; +}; + +struct RdmaResource { + ibv_qp* qp; + ibv_cq* cq; + ibv_comp_channel* comp_channel; + RdmaResource* next; + RdmaResource(); + ~RdmaResource(); +}; + +class BAIDU_CACHELINE_ALIGNMENT RdmaEndpoint : public SocketUser { +friend class RdmaConnect; +friend class brpc::Socket; +public: + RdmaEndpoint(Socket* s); + ~RdmaEndpoint(); + + // Global initialization + // Return 0 if success, -1 if failed and errno set + static int GlobalInitialize(); + + static void GlobalRelease(); + + // Reset the endpoint (for next use) + void Reset(); + + // Cut data from the given IOBuf list and use RDMA to send + // Return bytes cut if success, -1 if failed and errno set + ssize_t CutFromIOBufList(butil::IOBuf** data, size_t ndata); + + // Whether the endpoint can send more data + bool IsWritable() const; + + // For debug + void DebugInfo(std::ostream& os) const; + + // Callback when there is new epollin event on TCP fd + static void OnNewDataFromTcp(Socket* m); + +private: + enum State { + UNINIT = 0x0, + C_ALLOC_QPCQ = 0x1, + C_HELLO_SEND = 0x2, + C_HELLO_WAIT = 0x3, + C_BRINGUP_QP = 0x4, + C_ACK_SEND = 0x5, + S_HELLO_WAIT = 0x11, + S_ALLOC_QPCQ = 0x12, + S_BRINGUP_QP = 0x13, + S_HELLO_SEND = 0x14, + S_ACK_WAIT = 0x15, + ESTABLISHED = 0x100, + FALLBACK_TCP = 0x200, + FAILED = 0x300 + }; + + // Process handshake at the client + static void* ProcessHandshakeAtClient(void* arg); + + // Process handshake at the server + static void* ProcessHandshakeAtServer(void* arg); + + // Allocate resources + // Return 0 if success, -1 if failed and errno set + int AllocateResources(); + + // Release resources + void DeallocateResources(); + + // Send Imm data to the remote side + // Arguments: + // imm: imm data in the WR + // Return: + // 0: success + // -1: failed, errno set + int SendImm(uint32_t imm); + + // Try to send pure ACK to the remote side + // Arguments: + // num: the number of rq entry received + // Return: + // 0: success + // -1: failed, errno set + int SendAck(int num); + + // Handle CQE + // If wc is not RDMA RECV event: + // return 0 if success, -1 if failed and errno set + // If wc is RDMA RECV event: + // return bytes appended if success, -1 if failed and errno set + ssize_t HandleCompletion(ibv_wc& wc); + + // Post a given number of WRs to Recv Queue + // If zerocopy is true, reallocate block. + // Return 0 if success, -1 if failed and errno set + int PostRecv(uint32_t num, bool zerocopy); + + // Post a WR pointing to the block to the local Recv Queue + // Arguments: + // block: the addr to receive data (ibv_sge.addr) + // block_size: the maximum length can be received (ibv_sge.length) + // Return: + // 0: success + // -1: failed, errno set + int DoPostRecv(void* block, size_t block_size); + + // Read at most len bytes from fd in _socket to data + // wait for _read_butex if encounter EAGAIN + // return -1 if encounter other errno (including EOF) + int ReadFromFd(void* data, size_t len); + + + // Write at most len bytes from data to fd in _socket + // wait for _epollout_butex if encounter EAGAIN + // return -1 if encounter other errno + int WriteToFd(void* data, size_t len); + + // Bringup the QP from RESET state to RTS state + // Arguments: + // lid: remote LID + // gid: remote GID + // qp_num: remote QP number + // Return: + // 0: success + // -1: failed, errno set + int BringUpQp(uint16_t lid, ibv_gid gid, uint32_t qp_num); + + // Get event from comp channel and ack the events + int GetAndAckEvents(); + + // Poll CQ and get the work completion + static void PollCq(Socket* m); + + // Get the description of current handshake state + std::string GetStateStr() const; + + // Try to read data on TCP fd in _socket + inline void TryReadOnTcp(); + + // Not owner + Socket* _socket; + + // State of Handshake + State _state; + + // rdma resource + RdmaResource* _resource; + + // the number of events requiring ack + int _cq_events; + + // the SocketId which wrap the comp channel of CQ + SocketId _cq_sid; + + // Capacity of local Send Queue and local Recv Queue + uint16_t _sq_size; + uint16_t _rq_size; + + // Act as sendbuf and recvbuf, but requires no memcpy + std::vector _sbuf; + std::vector _rbuf; + // Data address of _rbuf + std::vector _rbuf_data; + // Remote block size for receiving + uint16_t _remote_recv_block_size; + + // The number of new recv WRs acked to the remote side + uint16_t _accumulated_ack; + // The number of WRs sent without solicited flag + uint16_t _unsolicited; + // The bytes sent without solicited flag + uint32_t _unsolicited_bytes; + // The current index should be used for sending + uint16_t _sq_current; + // The number of send WRs not signaled + uint16_t _sq_unsignaled; + // The just completed send WR's index + uint16_t _sq_sent; + // The just completed recv WR's index + uint16_t _rq_received; + // The capacity of local window: min(local SQ, remote RQ) + uint16_t _local_window_capacity; + // The capacity of remote window: min(local RQ, remote SQ) + uint16_t _remote_window_capacity; + // The number of WRs we can post to the local Send Queue + butil::atomic _window_size; + // The number of new WRs posted in the local Recv Queue + butil::atomic _new_rq_wrs; + + // butex for inform read events on TCP fd during handshake + butil::atomic *_read_butex; + + DISALLOW_COPY_AND_ASSIGN(RdmaEndpoint); +}; + +} // namespace rdma +} // namespace brpc + +#else // if BRPC_WITH_RDMA + +class RdmaEndpoint { }; + +#endif // ifdef USE_RD // dlopen +#include +#include +#include +#include +#include "butil/containers/flat_map.h" // butil::FlatMap +#include "butil/fd_guard.h" +#include "butil/fd_utility.h" // butil::make_non_blocking +#include "butil/logging.h" +#include "brpc/socket.h" +#include "brpc/rdma/block_pool.h" +#include "brpc/rdma/rdma_endpoint.h" +#include "brpc/rdma/rdma_helper.h" + + +namespace butil { +namespace iobuf { +// declared in iobuf.cpp +extern void* (*blockmem_allocate)(size_t); +extern void (*blockmem_deallocate)(void*); +} +} + +namespace brpc { +namespace rdma { + +void* g_handle_ibverbs = NULL; +bool g_skip_rdma_init = false; + +ibv_device** (*IbvGetDeviceList)(int*) = NULL; +void (*IbvFreeDeviceList)(ibv_device**) = NULL; +ibv_context* (*IbvOpenDevice)(ibv_device*) = NULL; +int (*IbvCloseDevice)(ibv_context*) = NULL; +const char* (*IbvGetDeviceName)(ibv_device*) = NULL; +int (*IbvForkInit)(void) = NULL; +int (*IbvQueryDevice)(ibv_context*, ibv_device_attr*) = NULL; +int (*IbvQueryPort)(ibv_context*, uint8_t, ibv_port_attr*) = NULL; +int (*IbvQueryGid)(ibv_context*, uint8_t, int, ibv_gid*) = NULL; +ibv_pd* (*IbvAllocPd)(ibv_context*) = NULL; +int (*IbvDeallocPd)(ibv_pd*) = NULL; +ibv_cq* (*IbvCreateCq)(ibv_context*, int, void*, ibv_comp_channel*, int) = NULL; +int (*IbvDestroyCq)(ibv_cq*) = NULL; +ibv_qp* (*IbvCreateQp)(ibv_pd*, ibv_qp_init_attr*) = NULL; +int (*IbvModifyQp)(ibv_qp*, ibv_qp_attr*, ibv_qp_attr_mask) = NULL; +int (*IbvQueryQp)(ibv_qp*, ibv_qp_attr*, ibv_qp_attr_mask, ibv_qp_init_attr*) = NULL; +int (*IbvDestroyQp)(ibv_qp*) = NULL; +ibv_comp_channel* (*IbvCreateCompChannel)(ibv_context*) = NULL; +int (*IbvDestroyCompChannel)(ibv_comp_channel*) = NULL; +ibv_mr* (*IbvRegMr)(ibv_pd*, void*, size_t, ibv_access_flags) = NULL; +int (*IbvDeregMr)(ibv_mr*) = NULL; +int (*IbvGetCqEvent)(ibv_comp_channel*, ibv_cq**, void**) = NULL; +void (*IbvAckCqEvents)(ibv_cq*, unsigned int) = NULL; +int (*IbvGetAsyncEvent)(ibv_context*, ibv_async_event*) = NULL; +void (*IbvAckAsyncEvent)(ibv_async_event*) = NULL; +const char* (*IbvEventTypeStr)(ibv_event_type) = NULL; + +// NOTE: +// ibv_post_send, ibv_post_recv, ibv_poll_cq, ibv_req_notify_cq are all inline function +// defined in infiniband/verbs.h. + +static int g_gid_tbl_len = 0; +static uint8_t g_gid_index = 0; +static ibv_gid g_gid; +static uint16_t g_lid; +static int g_max_sge = 0; +static uint8_t g_port_num = 1; + +static int g_comp_vector_index = 0; + +butil::atomic g_rdma_available(false); + +DEFINE_int32(rdma_max_sge, 0, "Max SGE num in a WR"); +DEFINE_string(rdma_device, "", "The name of the HCA device used " + "(Empty means using the first active device)"); +DEFINE_int32(rdma_port, 1, "The port number to use. For RoCE, it is always 1."); +DEFINE_int32(rdma_gid_index, -1, "The GID index to use. -1 means using the last one."); + +static const size_t SYSFS_SIZE = 4096; +static ibv_device** g_devices = NULL; +static ibv_context* g_context = NULL; +static SocketId g_async_socket; +static ibv_pd* g_pd = NULL; +static std::vector* g_mrs = NULL; + +// Store the original IOBuf memalloc and memdealloc functions +static void* (*g_mem_alloc)(size_t) = NULL; +static void (*g_mem_dealloc)(void*) = NULL; + +butil::Mutex* g_addr_map_lock; +typedef butil::FlatMap AddrMap; +static AddrMap* g_addr_map = NULL; // for mr not in memory pool + +static void GlobalRelease() { + g_rdma_available.store(false, butil::memory_order_release); + usleep(100000); // to avoid unload library too early + + // We do not set `g_async_socket' to failed explicitly to avoid + // close async_fd twice. + + if (g_addr_map_lock) { + BAIDU_SCOPED_LOCK(*g_addr_map_lock); + if (g_addr_map) { + for (AddrMap::iterator it = g_addr_map->begin(); + it != g_addr_map->end(); ++it) { + IbvDeregMr(it->second); + } + delete g_addr_map; + g_addr_map = NULL; // must set it to NULL + } + } + delete g_addr_map_lock; + + RdmaEndpoint::GlobalRelease(); + + if (g_mrs) { + for (size_t i = 0; i < g_mrs->size(); ++i) { + IbvDeregMr((*g_mrs)[i]); + } + delete g_mrs; + g_mrs = NULL; + } + + if (g_pd) { + IbvDeallocPd(g_pd); + g_pd = NULL; + } + + if (g_context) { + IbvCloseDevice(g_context); + g_context = NULL; + } + + if (g_devices) { + IbvFreeDeviceList(g_devices); + g_devices = NULL; + } +} + +uint32_t RdmaRegisterMemory(void* buf, size_t size) { + // Register the memory as callback in block_pool + // The thread-safety should be guaranteed by the caller + ibv_mr* mr = IbvRegMr(g_pd, buf, size, IBV_ACCESS_LOCAL_WRITE); + if (!mr) { + PLOG(ERROR) << "Fail to register memory"; + return 0; + } + g_mrs->push_back(mr); + return mr->lkey; +} + +static void* BlockAllocate(size_t len) { + if (len == 0) { + errno = EINVAL; + return NULL; + } + void* ptr = AllocBlock(len); + if (!ptr) { + LOG(ERROR) << "Fail to get block from memory pool"; + } + + return ptr; +} + +void BlockDeallocate(void* buf) { + if (!buf) { + errno = EINVAL; + return; + } + DeallocBlock(buf); +} + +static void FindRdmaLid() { + ibv_port_attr attr; + if (IbvQueryPort(g_context, g_port_num, &attr) < 0) { + return; + } + g_lid = attr.lid; + LOG(INFO) << "RDMA LID changes to: " << g_lid; + return; +} + +static bool FindRdmaGid(ibv_context* context) { + bool found = false; + for (int i = 0; i < g_gid_tbl_len; ++i) { + ibv_gid gid; + if (IbvQueryGid(context, g_port_num, i, &gid) < 0) { + continue; + } + if (gid.global.interface_id == 0) { + continue; + } + if (FLAGS_rdma_gid_index == i) { + g_gid = gid; + g_gid_index = i; + return true; + } + // For infiniband, there is only one GID for each port. + // For RoCE, there are 2 GIDs for each MAC and 2 GIDs for each IP. + // Generally, the last GID is a RoCEv2-type GID generated by IP. + g_gid = gid; + g_gid_index = i; + found = true; + } + if (FLAGS_rdma_gid_index >= 0) { + if (g_gid_index != FLAGS_rdma_gid_index) { + found = false; + } + } + return found; +} + +static void OnRdmaAsyncEvent(Socket* m) { + int progress = Socket::PROGRESS_INIT; + do { + ibv_async_event event; + if (IbvGetAsyncEvent(g_context, &event) < 0) { + break; + } + LOG(WARNING) << "rdma async event: " << IbvEventTypeStr(event.event_type); + switch (event.event_type) { + case IBV_EVENT_QP_REQ_ERR: + case IBV_EVENT_QP_ACCESS_ERR: + case IBV_EVENT_QP_FATAL: { + SocketId sid = (SocketId)event.element.qp->qp_context; + SocketUniquePtr s; + if (Socket::Address(sid, &s) == 0) { + s->SetFailed(ERDMA, "QP fatal error"); + LOG(WARNING) << "Receive a QP fatal error on " << s->description(); + } + // NOTE: + // We must ack the async event here, before `s' is recycled. + // Otherwise there will be an deadlock. + // Please check the use of ibv_ack_async_event at: + // http://www.rdmamojo.com/2012/08/16/ibv_ack_async_event/ + IbvAckAsyncEvent(&event); + break; + } + case IBV_EVENT_CQ_ERR: { + LOG(WARNING) << "CQ overruns, the connection will be stopped."; + IbvAckAsyncEvent(&event); + break; + } + case IBV_EVENT_COMM_EST: + case IBV_EVENT_SQ_DRAINED: + case IBV_EVENT_QP_LAST_WQE_REACHED: { + // just ignore the event + IbvAckAsyncEvent(&event); + break; + } + case IBV_EVENT_SRQ_ERR: + case IBV_EVENT_SRQ_LIMIT_REACHED: { + // SRQ not used, should not happen + IbvAckAsyncEvent(&event); + break; + } + case IBV_EVENT_LID_CHANGE: { + FindRdmaLid(); + IbvAckAsyncEvent(&event); + break; + } + case IBV_EVENT_PATH_MIG: + case IBV_EVENT_PATH_MIG_ERR: + case IBV_EVENT_PKEY_CHANGE: + case IBV_EVENT_SM_CHANGE: + case IBV_EVENT_CLIENT_REREGISTER: { + // for IB only, we haven't test these events carefully + IbvAckAsyncEvent(&event); + break; + } + case IBV_EVENT_PORT_ACTIVE: + case IBV_EVENT_PORT_ERR: { + // Port up/down will lead these two events. + // The port error is recoverable. + IbvAckAsyncEvent(&event); + break; + } + case IBV_EVENT_GID_CHANGE: { + FindRdmaGid(g_context); + IbvAckAsyncEvent(&event); + break; + } + case IBV_EVENT_DEVICE_FATAL: { + // because the memory resources are related to rdma device + // we view this error unrecoverable + GlobalDisableRdma(); + IbvAckAsyncEvent(&event); + break; + } + default: + // should not hannen + IbvAckAsyncEvent(&event); + break; + } + if (!m->MoreReadEvents(&progress)) { + break; + } + } while (true); +} + +#define LoadSymbol(handle, func, symbol) \ + *(void**)(&func) = dlsym(handle, symbol); \ + if (!func) { \ + LOG(ERROR) << "Fail to find symbol: " << symbol; \ + return -1; \ + } + +static int ReadRdmaDynamicLib() { + g_handle_ibverbs = dlopen("libibverbs.so", RTLD_LAZY); + if (!g_handle_ibverbs) { + LOG(ERROR) << "Fail to load libibverbs.so due to " << dlerror(); + return -1; + } + + LoadSymbol(g_handle_ibverbs, IbvGetDeviceList, "ibv_get_device_list"); + LoadSymbol(g_handle_ibverbs, IbvFreeDeviceList, "ibv_free_device_list"); + LoadSymbol(g_handle_ibverbs, IbvOpenDevice, "ibv_open_device"); + LoadSymbol(g_handle_ibverbs, IbvCloseDevice, "ibv_close_device"); + LoadSymbol(g_handle_ibverbs, IbvGetDeviceName, "ibv_get_device_name"); + LoadSymbol(g_handle_ibverbs, IbvForkInit, "ibv_fork_init"); + LoadSymbol(g_handle_ibverbs, IbvQueryDevice, "ibv_query_device"); + LoadSymbol(g_handle_ibverbs, IbvQueryPort, "ibv_query_port"); + LoadSymbol(g_handle_ibverbs, IbvQueryGid, "ibv_query_gid"); + LoadSymbol(g_handle_ibverbs, IbvAllocPd, "ibv_alloc_pd"); + LoadSymbol(g_handle_ibverbs, IbvDeallocPd, "ibv_dealloc_pd"); + LoadSymbol(g_handle_ibverbs, IbvCreateCq, "ibv_create_cq"); + LoadSymbol(g_handle_ibverbs, IbvDestroyCq, "ibv_destroy_cq"); + LoadSymbol(g_handle_ibverbs, IbvCreateQp, "ibv_create_qp"); + LoadSymbol(g_handle_ibverbs, IbvModifyQp, "ibv_modify_qp"); + LoadSymbol(g_handle_ibverbs, IbvQueryQp, "ibv_query_qp"); + LoadSymbol(g_handle_ibverbs, IbvDestroyQp, "ibv_destroy_qp"); + LoadSymbol(g_handle_ibverbs, IbvCreateCompChannel, "ibv_create_comp_channel"); + LoadSymbol(g_handle_ibverbs, IbvDestroyCompChannel, "ibv_destroy_comp_channel"); + LoadSymbol(g_handle_ibverbs, IbvRegMr, "ibv_reg_mr"); + LoadSymbol(g_handle_ibverbs, IbvDeregMr, "ibv_dereg_mr"); + LoadSymbol(g_handle_ibverbs, IbvGetCqEvent, "ibv_get_cq_event"); + LoadSymbol(g_handle_ibverbs, IbvAckCqEvents, "ibv_ack_cq_events"); + LoadSymbol(g_handle_ibverbs, IbvGetAsyncEvent, "ibv_get_async_event"); + LoadSymbol(g_handle_ibverbs, IbvAckAsyncEvent, "ibv_ack_async_event"); + LoadSymbol(g_handle_ibverbs, IbvEventTypeStr, "ibv_event_type_str"); + + return 0; +} + +static inline void ExitWithError() { + GlobalRelease(); + exit(1); +} + +static void GlobalRdmaInitializeOrDieImpl() { + if (BAIDU_UNLIKELY(g_skip_rdma_init)) { + // Just for UT + return; + } + + if (ReadRdmaDynamicLib() < 0) { + LOG(ERROR) << "Fail to load rdma dynamic lib"; + ExitWithError(); + } + + // ibv_fork_init is very important. If we don't call this API, + // we may get some very, very strange problems if the program + // calls fork(). + if (IbvForkInit()) { + PLOG(ERROR) << "Fail to ibv_fork_init"; + ExitWithError(); + } + + int num = 0; + g_devices = IbvGetDeviceList(&num); + if (num == 0) { + LOG(ERROR) << "Fail to find rdma device"; + ExitWithError(); + } + + // Find the first active port + int available_devices = 0; + g_port_num = FLAGS_rdma_port; + for (int i = 0; i < num; ++i) { + ibv_context* context = IbvOpenDevice(g_devices[i]); + if (!context) { + PLOG(ERROR) << "Fail to open rdma device " << IbvGetDeviceName(g_devices[i]); + ExitWithError(); + } + ibv_port_attr attr; + if (IbvQueryPort(context, g_port_num, &attr) < 0) { + PLOG(WARNING) << "Fail to query port " << g_port_num + << " on " << IbvGetDeviceName(g_devices[i]); + if (FLAGS_rdma_device.size() > 0) { + ExitWithError(); + } + IbvCloseDevice(context); + continue; + } + if (attr.state != IBV_PORT_ACTIVE) { + IbvCloseDevice(context); + continue; + } + if (FLAGS_rdma_device.size() > 0) { + if (strcmp(context->device->name, FLAGS_rdma_device.c_str()) == 0) { + ++available_devices; + g_context = context; + g_gid_tbl_len = attr.gid_tbl_len; + g_lid = attr.lid; + break; + } + } else { + g_context = context; + g_gid_tbl_len = attr.gid_tbl_len; + g_lid = attr.lid; + ++available_devices; + } + } + if (!g_context) { + LOG(ERROR) << "Fail to find available RDMA device " << FLAGS_rdma_device; + ExitWithError(); + } + if (available_devices > 1 && FLAGS_rdma_device.size() == 0) { + LOG(INFO) << "This server has more than one available RDMA device. Only " + << "the first one (" << g_context->device->name + << ") will be used. If you want to use other device, please " + << "specify it with --rdma_device."; + } else { + LOG(INFO) << "RDMA device: " << g_context->device->name; + } + LOG(INFO) << "RDMA LID: " << g_lid; + if (!FindRdmaGid(g_context)) { + LOG(ERROR) << "Fail to find available RDMA GID"; + ExitWithError(); + } else { + LOG(INFO) << "RDMA GID Index: " << (int)g_gid_index; + } + IbvCreateCompChannel(g_context); + + // Create protection domain + g_pd = IbvAllocPd(g_context); + if (!g_pd) { + PLOG(ERROR) << "Fail to allocate protection domain"; + ExitWithError(); + } + + g_mrs = new (std::nothrow) std::vector; + if (!g_mrs) { + PLOG(ERROR) << "Fail to allocate a RDMA MR list"; + ExitWithError(); + } + + ibv_device_attr attr; + if (IbvQueryDevice(g_context, &attr) < 0) { + PLOG(ERROR) << "Fail to get the device information"; + ExitWithError(); + } + // Too large sge consumes too much memory for QP + if (FLAGS_rdma_max_sge > 0) { + g_max_sge = attr.max_sge < FLAGS_rdma_max_sge ? + attr.max_sge : FLAGS_rdma_max_sge; + } else { + g_max_sge = attr.max_sge; + } + + // Initialize RDMA memory pool (block_pool) + if (!InitBlockPool(RdmaRegisterMemory)) { + PLOG(ERROR) << "Fail to initialize RDMA memory pool"; + ExitWithError(); + } + + if (RdmaEndpoint::GlobalInitialize() < 0) { + LOG(ERROR) << "rdma_recv_block_type incorrect " + << "(valid value: default/large/huge)"; + ExitWithError(); + } + + g_addr_map_lock = new (std::nothrow) butil::Mutex; + if (!g_addr_map_lock) { + PLOG(WARNING) << "Fail to construct g_addr_map_lock"; + ExitWithError(); + } + + g_addr_map = new (std::nothrow) AddrMap; + if (!g_addr_map) { + PLOG(WARNING) << "Fail to construct g_addr_map"; + ExitWithError(); + } + + if (g_addr_map->init(65536) < 0) { + PLOG(WARNING) << "Fail to initialize g_addr_map"; + ExitWithError(); + } + + SocketOptions opt; + opt.fd = g_context->async_fd; + butil::make_close_on_exec(opt.fd); + if (butil::make_non_blocking(opt.fd) < 0) { + PLOG(WARNING) << "Fail to set async_fd to nonblocking"; + ExitWithError(); + } + opt.on_edge_triggered_events = OnRdmaAsyncEvent; + if (Socket::Create(opt, &g_async_socket) < 0) { + LOG(WARNING) << "Fail to create socket to get async event of RDMA"; + ExitWithError(); + } + + atexit(GlobalRelease); + + g_mem_alloc = butil::iobuf::blockmem_allocate; + g_mem_dealloc = butil::iobuf::blockmem_deallocate; + butil::iobuf::blockmem_allocate = BlockAllocate; + butil::iobuf::blockmem_deallocate = BlockDeallocate; + g_rdma_available.store(true, butil::memory_order_relaxed); +} + +static pthread_once_t initialize_rdma_once = PTHREAD_ONCE_INIT; + +void GlobalRdmaInitializeOrDie() { + if (pthread_once(&initialize_rdma_once, + GlobalRdmaInitializeOrDieImpl) != 0) { + LOG(FATAL) << "Fail to pthread_once GlobalRdmaInitializeOrDie"; + exit(1); + } +} + +int RegisterMemoryForRdma(void* buf, size_t len) { + ibv_mr* mr = IbvRegMr(g_pd, buf, len, IBV_ACCESS_LOCAL_WRITE); + if (!mr) { + return -1; + } + BAIDU_SCOPED_LOCK(*g_addr_map_lock); + if (!g_addr_map->insert(buf, mr)) { + IbvDeregMr(mr); + return -1; + } + return 0; +} + +void DeregisterMemoryForRdma(void* buf) { + BAIDU_SCOPED_LOCK(*g_addr_map_lock); + ibv_mr** mr = g_addr_map->seek(buf); + if (mr && *mr) { + IbvDeregMr(*mr); + g_addr_map->erase(buf); + } +} + +int GetRdmaMaxSge() { + return g_max_sge; +} + +int GetRdmaCompVector() { + if (!g_context) { + return 0; + } + // g_comp_vector_index is not an atomic variable. If more than + // one CQ is created at the same time, some CQs will share the + // same index. However, this vector is only used to assign a + // event queue for the CQ. Sharing the same event queue is not + // a problem. + return (g_comp_vector_index++) % g_context->num_comp_vectors; +} + +ibv_context* GetRdmaContext() { + return g_context; +} + +ibv_pd* GetRdmaPd() { + return g_pd; +} + +uint32_t GetLKey(const void* buf) { + uint32_t lkey = GetRegionId(buf); + if (lkey == 0) { + BAIDU_SCOPED_LOCK(*g_addr_map_lock); + ibv_mr** mr = g_addr_map->seek(buf); + if (mr && *mr) { + lkey = (*mr)->lkey; + } + } + return lkey; +} + +ibv_gid GetRdmaGid() { + return g_gid; +} + +uint16_t GetRdmaLid() { + return g_lid; +} + +uint8_t GetRdmaGidIndex() { + return g_gid_index; +} + +uint8_t GetRdmaPortNum() { + return g_port_num; +} + +bool IsRdmaAvailable() { + return g_rdma_available.load(butil::memory_order_acquire); +} + +void GlobalDisableRdma() { + if (g_rdma_available.exchange(false, butil::memory_order_acquire)) { + LOG(FATAL) << "RDMA is disabled due to some unrecoverable problem"; + } +} + +bool SupportedByRdma(std::string protocol) { + if (protocol.compare("baidu_std") == 0) { + // Since rdma is used for high performance scenario, + // we consider baidu_std for the only protocol to support. + return true; + } + return false; +} + +} // namespace rdma +} // namespace brpc + +#endif // if BRPC_WITH_RDMA \ No newline at end of file diff --git a/src/brpc/rdma/rdma_helper.h b/src/brpc/rdma/rdma_helper.h new file mode 100644 index 0000000000..9d60a066e3 --- /dev/null +++ b/src/brpc/rdma/rdma_helper.h @@ -0,0 +1,84 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef BRPC_RDMA_HELPER_H +#define BRPC_RDMA_HELPER_H + +#if BRPC_WITH_RDMA + +#include + + +namespace brpc { +namespace rdma { + +// Initialize RDMA environment +// Exit if failed +void GlobalRdmaInitializeOrDie(); + +// Register the given memory +// Return 0 if success, -1 if failed and errno set +int RegisterMemoryForRdma(void* buf, size_t len); + +// Deregister the given memory +void DeregisterMemoryForRdma(void* buf); + +// Get global RDMA context +ibv_context* GetRdmaContext(); + +// Get global RDMA protection domain +ibv_pd* GetRdmaPd(); + +// Return lkey of the given address +uint32_t GetLKey(const void* buf); + +// Return GID Index +uint8_t GetRdmaGidIndex(); + +// Return Global GID +ibv_gid GetRdmaGid(); + +// Return Global LID +uint16_t GetRdmaLid(); + +// Return suggested comp vector for CQ +int GetRdmaCompVector(); + +// Return current port number used +uint8_t GetRdmaPortNum(); + +// Get max_sge supported by the device +int GetRdmaMaxSge(); + +// Get suggested comp_vector for a new CQ +int GetCompVector(); + +// If the RDMA environment is available +bool IsRdmaAvailable(); + +// Disable RDMA in the remaining lifetime of the process +void GlobalDisableRdma(); + +// If the given protocol supported by RDMA +bool SupportedByRdma(std::string protocol); + +} // namespace rdma +} // namespace brpc + +#endif // if BRPC_WITH_RDMA + +#endif // BRPC_RDMA_HELPER_H diff --git a/src/brpc/redis.h b/src/brpc/redis.h index 035cf70a0d..d02e894121 100644 --- a/src/brpc/redis.h +++ b/src/brpc/redis.h @@ -112,7 +112,7 @@ class RedisRequest : public ::google::protobuf::Message { #if GOOGLE_PROTOBUF_VERSION >= 3006000 RedisRequest* New(::google::protobuf::Arena* arena) const override; #endif - void CopyFrom(const ::google::protobuf::Message& from) override; + void CopyFrom(const ::google::protobuf::Message& from) PB_321_OVERRIDE; void MergeFrom(const ::google::protobuf::Message& from) override; void CopyFrom(const RedisRequest& from); void MergeFrom(const RedisRequest& from); @@ -185,7 +185,7 @@ class RedisResponse : public ::google::protobuf::Message { #if GOOGLE_PROTOBUF_VERSION >= 3006000 RedisResponse* New(::google::protobuf::Arena* arena) const override; #endif - void CopyFrom(const ::google::protobuf::Message& from) override; + void CopyFrom(const ::google::protobuf::Message& from) PB_321_OVERRIDE; void MergeFrom(const ::google::protobuf::Message& from) override; void CopyFrom(const RedisResponse& from); void MergeFrom(const RedisResponse& from); diff --git a/src/brpc/redis_command.cpp b/src/brpc/redis_command.cpp index 2396e457df..82f795051c 100644 --- a/src/brpc/redis_command.cpp +++ b/src/brpc/redis_command.cpp @@ -361,6 +361,10 @@ RedisCommandParser::RedisCommandParser() , _length(0) , _index(0) {} +size_t RedisCommandParser::ParsedArgsSize() { + return _args.size(); +} + ParseError RedisCommandParser::Consume(butil::IOBuf& buf, std::vector* args, butil::Arena* arena) { diff --git a/src/brpc/redis_command.h b/src/brpc/redis_command.h index fc88399570..5ddfb8e99a 100644 --- a/src/brpc/redis_command.h +++ b/src/brpc/redis_command.h @@ -53,6 +53,7 @@ class RedisCommandParser { // in `arena'. ParseError Consume(butil::IOBuf& buf, std::vector* args, butil::Arena* arena); + size_t ParsedArgsSize(); private: // Reset parser to the initial state. diff --git a/src/brpc/serialized_request.h b/src/brpc/serialized_request.h index 6a02e5b28a..0fbf76bad4 100644 --- a/src/brpc/serialized_request.h +++ b/src/brpc/serialized_request.h @@ -48,7 +48,7 @@ class SerializedRequest : public ::google::protobuf::Message { #if GOOGLE_PROTOBUF_VERSION >= 3006000 SerializedRequest* New(::google::protobuf::Arena* arena) const override; #endif - void CopyFrom(const ::google::protobuf::Message& from) override; + void CopyFrom(const ::google::protobuf::Message& from) PB_321_OVERRIDE; void CopyFrom(const SerializedRequest& from); void Clear() override; bool IsInitialized() const override; diff --git a/src/brpc/server.cpp b/src/brpc/server.cpp index 8b647fd49c..675e5aee18 100644 --- a/src/brpc/server.cpp +++ b/src/brpc/server.cpp @@ -75,6 +75,7 @@ #include "brpc/rtmp.h" #include "brpc/builtin/common.h" // GetProgramName #include "brpc/details/tcmalloc_extension.h" +#include "brpc/rdma/rdma_helper.h" inline std::ostream& operator<<(std::ostream& os, const timeval& tm) { const char old_fill = os.fill(); @@ -137,6 +138,7 @@ ServerOptions::ServerOptions() , bthread_init_count(0) , internal_port(-1) , has_builtin_services(true) + , use_rdma(false) , http_master_service(NULL) , health_reporter(NULL) , rtmp_service(NULL) @@ -705,6 +707,28 @@ static bool CreateConcurrencyLimiter(const AdaptiveMaxConcurrency& amc, return true; } +#if BRPC_WITH_RDMA +static bool OptionsAvailableOverRdma(const ServerOptions* opt) { + if (opt->rtmp_service) { + LOG(WARNING) << "RTMP is not supported by RDMA"; + return false; + } + if (opt->has_ssl_options()) { + LOG(WARNING) << "SSL is not supported by RDMA"; + return false; + } + if (opt->nshead_service) { + LOG(WARNING) << "NSHEAD is not supported by RDMA"; + return false; + } + if (opt->mongo_service_adaptor) { + LOG(WARNING) << "MONGO is not supported by RDMA"; + return false; + } + return true; +} +#endif + static AdaptiveMaxConcurrency g_default_max_concurrency_of_method(0); int Server::StartInternal(const butil::EndPoint& endpoint, @@ -745,6 +769,18 @@ int Server::StartInternal(const butil::EndPoint& endpoint, return -1; } + if (_options.use_rdma) { +#if BRPC_WITH_RDMA + if (!OptionsAvailableOverRdma(&_options)) { + return -1; + } + rdma::GlobalRdmaInitializeOrDie(); +#else + LOG(WARNING) << "Cannot use rdma since brpc does not compile with rdma"; + return -1; +#endif + } + if (_options.http_master_service) { // Check requirements for http_master_service: // has "default_method" & request/response have no fields @@ -985,6 +1021,7 @@ int Server::StartInternal(const butil::EndPoint& endpoint, LOG(ERROR) << "Fail to build acceptor"; return -1; } + _am->_use_rdma = _options.use_rdma; } // Set `_status' to RUNNING before accepting connections // to prevent requests being rejected as ELOGOFF diff --git a/src/brpc/server.h b/src/brpc/server.h index 261bfdce2d..7bba8dda08 100644 --- a/src/brpc/server.h +++ b/src/brpc/server.h @@ -206,7 +206,11 @@ struct ServerOptions { bool has_ssl_options() const { return _ssl_options != NULL; } const ServerSSLOptions& ssl_options() const { return *_ssl_options.get(); } ServerSSLOptions* mutable_ssl_options(); - + + // Whether the server uses rdma or not + // Default: false + bool use_rdma; + // [CAUTION] This option is for implementing specialized http proxies, // most users don't need it. Don't change this option unless you fully // understand the description below. diff --git a/src/brpc/socket.cpp b/src/brpc/socket.cpp index 210ba9e9d0..fb5ae19d86 100644 --- a/src/brpc/socket.cpp +++ b/src/brpc/socket.cpp @@ -48,6 +48,8 @@ #include "brpc/policy/rtmp_protocol.h" // FIXME #include "brpc/periodic_task.h" #include "brpc/details/health_check.h" +#include "brpc/rdma/rdma_endpoint.h" +#include "brpc/rdma/rdma_helper.h" #if defined(OS_MACOSX) #include #endif @@ -442,6 +444,8 @@ Socket::Socket(Forbidden) , _auth_context(NULL) , _ssl_state(SSL_UNKNOWN) , _ssl_session(NULL) + , _rdma_ep(NULL) + , _rdma_state(RDMA_OFF) , _connection_type_for_progressive_read(CONNECTION_TYPE_UNKNOWN) , _controller_released_socket(false) , _overcrowded(false) @@ -630,6 +634,22 @@ int Socket::Create(const SocketOptions& options, SocketId* id) { m->_ssl_state = (options.initial_ssl_ctx == NULL ? SSL_OFF : SSL_UNKNOWN); m->_ssl_session = NULL; m->_ssl_ctx = options.initial_ssl_ctx; +#if BRPC_WITH_RDMA + CHECK(m->_rdma_ep == NULL); + if (options.use_rdma) { + m->_rdma_ep = new (std::nothrow)rdma::RdmaEndpoint(m); + if (!m->_rdma_ep) { + const int saved_errno = errno; + PLOG(ERROR) << "Fail to create RdmaEndpoint"; + m->SetFailed(saved_errno, "Fail to create RdmaEndpoint: %s", + berror(saved_errno)); + return -1; + } + m->_rdma_state = RDMA_UNKNOWN; + } else { + m->_rdma_state = RDMA_OFF; + } +#endif m->_connection_type_for_progressive_read = CONNECTION_TYPE_UNKNOWN; m->_controller_released_socket.store(false, butil::memory_order_relaxed); m->_overcrowded = false; @@ -709,6 +729,14 @@ int Socket::WaitAndReset(int32_t expected_nref) { g_vars->channel_conn << -1; } } + +#if BRPC_WITH_RDMA + if (_rdma_ep) { + _rdma_ep->Reset(); + _rdma_state = RDMA_UNKNOWN; + } +#endif + _local_side = butil::EndPoint(); if (_ssl_session) { SSL_free(_ssl_session); @@ -1009,6 +1037,15 @@ void Socket::OnRecycle() { g_vars->channel_conn << -1; } } + +#if BRPC_WITH_RDMA + if (_rdma_ep) { + delete _rdma_ep; + _rdma_ep = NULL; + _rdma_state = RDMA_UNKNOWN; + } +#endif + reset_parsing_context(NULL); _read_buf.clear(); @@ -1285,6 +1322,11 @@ int Socket::ConnectIfNot(const timespec* abstime, WriteRequest* req) { return 1; } +void Socket::WakeAsEpollOut() { + _epollout_butex->fetch_add(1, butil::memory_order_release); + bthread::butex_wake_except(_epollout_butex, 0); +} + int Socket::HandleEpollOut(SocketId id) { SocketUniquePtr s; // Since Sockets might have been `SetFailed' before they were @@ -1584,7 +1626,16 @@ int Socket::StartWrite(WriteRequest* req, const WriteOptions& opt) { butil::IOBuf* data_arr[1] = { &req->data }; nw = _conn->CutMessageIntoFileDescriptor(fd(), data_arr, 1); } else { - nw = req->data.cut_into_file_descriptor(fd()); +#if BRPC_WITH_RDMA + if (_rdma_ep && _rdma_state != RDMA_OFF) { + butil::IOBuf* data_arr[1] = { &req->data }; + nw = _rdma_ep->CutFromIOBufList(data_arr, 1); + } else { +#else + { +#endif + nw = req->data.cut_into_file_descriptor(fd()); + } } if (nw < 0) { // RTMP may return EOVERCROWDED @@ -1666,21 +1717,50 @@ void* Socket::KeepWrite(void* void_arg) { // Update(8/15/2017): Not working, performance downgraded. //if (nw <= 0 || req->data.empty()/*note*/) { if (nw <= 0) { - g_vars->nwaitepollout << 1; - bool pollin = (s->_on_edge_triggered_events != NULL); // NOTE: Waiting epollout within timeout is a must to force // KeepWrite to check and setup pending WriteRequests periodically, // which may turn on _overcrowded to stop pending requests from // growing infinitely. const timespec duetime = butil::milliseconds_from_now(WAIT_EPOLLOUT_TIMEOUT_MS); - const int rc = s->WaitEpollOut(s->fd(), pollin, &duetime); - if (rc < 0 && errno != ETIMEDOUT) { - const int saved_errno = errno; - PLOG(WARNING) << "Fail to wait epollout of " << *s; - s->SetFailed(saved_errno, "Fail to wait epollout of %s: %s", +#if BRPC_WITH_RDMA + if (s->_rdma_state == RDMA_ON) { + const int expected_val = s->_epollout_butex + ->load(butil::memory_order_acquire); + CHECK(s->_rdma_ep != NULL); + if (!s->_rdma_ep->IsWritable()) { + g_vars->nwaitepollout << 1; + if (bthread::butex_wait(s->_epollout_butex, + expected_val, &duetime) < 0) { + if (errno != EAGAIN && errno != ETIMEDOUT) { + const int saved_errno = errno; + PLOG(WARNING) << "Fail to wait rdma window of " << *s; + s->SetFailed(saved_errno, "Fail to wait rdma window of %s: %s", + s->description().c_str(), berror(saved_errno)); + } + if (s->Failed()) { + // NOTE: + // Different from TCP, we cannot find the RDMA channel + // failed by writing to it. Thus we must check if it + // is already failed here. + break; + } + } + } + } else { +#else + { +#endif + g_vars->nwaitepollout << 1; + bool pollin = (s->_on_edge_triggered_events != NULL); + const int rc = s->WaitEpollOut(s->fd(), pollin, &duetime); + if (rc < 0 && errno != ETIMEDOUT) { + const int saved_errno = errno; + PLOG(WARNING) << "Fail to wait epollout of " << *s; + s->SetFailed(saved_errno, "Fail to wait epollout of %s: %s", s->description().c_str(), berror(saved_errno)); - break; + break; + } } } if (NULL == cur_tail) { @@ -1715,9 +1795,13 @@ ssize_t Socket::DoWrite(WriteRequest* req) { if (_conn) { return _conn->CutMessageIntoFileDescriptor(fd(), data_list, ndata); } else { - ssize_t nw = butil::IOBuf::cut_multiple_into_file_descriptor( +#if BRPC_WITH_RDMA + if (_rdma_ep && _rdma_state != RDMA_OFF) { + return _rdma_ep->CutFromIOBufList(data_list, ndata); + } +#endif + return butil::IOBuf::cut_multiple_into_file_descriptor( fd(), data_list, ndata); - return nw; } } @@ -1865,6 +1949,7 @@ ssize_t Socket::DoRead(size_t size_hint) { } // _ssl_state has been set if (ssl_state() == SSL_OFF) { + CHECK(_rdma_state == RDMA_OFF); return _read_buf.append_from_file_descriptor(fd(), size_hint); } @@ -2229,6 +2314,11 @@ void Socket::DebugSocket(std::ostream& os, SocketId id) { << "\n}"; } #endif +#if BRPC_WITH_RDMA + if (ptr->_rdma_state == RDMA_ON && ptr->_rdma_ep) { + ptr->_rdma_ep->DebugInfo(os); + } +#endif } int Socket::CheckHealth() { @@ -2452,6 +2542,7 @@ int Socket::GetPooledSocket(SocketUniquePtr* pooled_socket) { opt.initial_ssl_ctx = _ssl_ctx; opt.keytable_pool = _keytable_pool; opt.app_connect = _app_connect; + opt.use_rdma = (_rdma_ep) ? true : false; socket_pool = new SocketPool(opt); SocketPool* expected = NULL; if (!main_sp->socket_pool.compare_exchange_strong( @@ -2548,6 +2639,7 @@ int Socket::GetShortSocket(SocketUniquePtr* short_socket) { opt.initial_ssl_ctx = _ssl_ctx; opt.keytable_pool = _keytable_pool; opt.app_connect = _app_connect; + opt.use_rdma = (_rdma_ep) ? true : false; if (get_client_side_messenger()->Create(opt, &id) != 0 || Socket::Address(id, short_socket) != 0) { return -1; diff --git a/src/brpc/socket.h b/src/brpc/socket.h index ffc84e31fc..ac3d4deb55 100644 --- a/src/brpc/socket.h +++ b/src/brpc/socket.h @@ -48,6 +48,10 @@ class H2GlobalStreamCreator; namespace schan { class ChannelBalancer; } +namespace rdma { +class RdmaEndpoint; +class RdmaConnect; +} class Socket; class AuthContext; @@ -188,6 +192,7 @@ struct SocketOptions { void (*on_edge_triggered_events)(Socket*); int health_check_interval_s; std::shared_ptr initial_ssl_ctx; + bool use_rdma; bthread_keytable_pool_t* keytable_pool; SocketConnection* conn; std::shared_ptr app_connect; @@ -208,6 +213,8 @@ friend class Controller; friend class policy::ConsistentHashingLoadBalancer; friend class policy::RtmpContext; friend class schan::ChannelBalancer; +friend class rdma::RdmaEndpoint; +friend class rdma::RdmaConnect; friend class HealthCheckTask; friend class OnAppHealthCheckDone; friend class HealthCheckManager; @@ -543,6 +550,13 @@ friend class policy::H2GlobalStreamCreator; private: DISALLOW_COPY_AND_ASSIGN(Socket); + // The on/off state of RDMA + enum RdmaState { + RDMA_ON, + RDMA_OFF, + RDMA_UNKNOWN + }; + int ConductError(bthread_id_t); int StartWrite(WriteRequest*, const WriteOptions&); @@ -618,6 +632,9 @@ friend void DereferenceSocket(Socket*); WriteRequest*, int error_code, const std::string& error_text); void ReleaseAllFailedWriteRequests(WriteRequest*); + // Try to wake socket just like epollout has arrived + void WakeAsEpollOut(); + // Generic callback for Socket to handle epollout event static int HandleEpollOut(SocketId socket_id); @@ -793,6 +810,11 @@ friend void DereferenceSocket(Socket*); SSL* _ssl_session; // owner std::shared_ptr _ssl_ctx; + // The RdmaEndpoint + rdma::RdmaEndpoint* _rdma_ep; + // Should use RDMA or not + RdmaState _rdma_state; + // Pass from controller, for progressive reading. ConnectionType _connection_type_for_progressive_read; butil::atomic _controller_released_socket; diff --git a/src/brpc/socket_inl.h b/src/brpc/socket_inl.h index 31ce6a907e..9423bfdf0e 100644 --- a/src/brpc/socket_inl.h +++ b/src/brpc/socket_inl.h @@ -57,6 +57,7 @@ inline SocketOptions::SocketOptions() , user(NULL) , on_edge_triggered_events(NULL) , health_check_interval_s(-1) + , use_rdma(false) , keytable_pool(NULL) , conn(NULL) , app_connect(NULL) diff --git a/src/brpc/socket_map.cpp b/src/brpc/socket_map.cpp index a08f50b299..512e70a767 100644 --- a/src/brpc/socket_map.cpp +++ b/src/brpc/socket_map.cpp @@ -87,8 +87,9 @@ SocketMap* get_or_new_client_side_socket_map() { } int SocketMapInsert(const SocketMapKey& key, SocketId* id, - const std::shared_ptr& ssl_ctx) { - return get_or_new_client_side_socket_map()->Insert(key, id, ssl_ctx); + const std::shared_ptr& ssl_ctx, + bool use_rdma) { + return get_or_new_client_side_socket_map()->Insert(key, id, ssl_ctx, use_rdma); } int SocketMapFind(const SocketMapKey& key, SocketId* id) { @@ -210,7 +211,8 @@ void SocketMap::PrintSocketMap(std::ostream& os, void* arg) { } int SocketMap::Insert(const SocketMapKey& key, SocketId* id, - const std::shared_ptr& ssl_ctx) { + const std::shared_ptr& ssl_ctx, + bool use_rdma) { std::unique_lock mu(_mutex); SingleConnection* sc = _map.seek(key); if (sc) { @@ -234,6 +236,7 @@ int SocketMap::Insert(const SocketMapKey& key, SocketId* id, SocketOptions opt; opt.remote_side = key.peer.addr; opt.initial_ssl_ctx = ssl_ctx; + opt.use_rdma = use_rdma; if (_options.socket_creator->CreateSocket(opt, &tmp_id) != 0) { PLOG(FATAL) << "Fail to create socket to " << key.peer; return -1; diff --git a/src/brpc/socket_map.h b/src/brpc/socket_map.h index 3e6695ba5a..893239461d 100644 --- a/src/brpc/socket_map.h +++ b/src/brpc/socket_map.h @@ -80,11 +80,17 @@ struct SocketMapKeyHasher { // successfully, SocketMapRemove() MUST be called when the Socket is not needed. // Return 0 on success, -1 otherwise. int SocketMapInsert(const SocketMapKey& key, SocketId* id, - const std::shared_ptr& ssl_ctx); + const std::shared_ptr& ssl_ctx, + bool use_rdma); + +inline int SocketMapInsert(const SocketMapKey& key, SocketId* id, + const std::shared_ptr& ssl_ctx) { + return SocketMapInsert(key, id, ssl_ctx, false); +} inline int SocketMapInsert(const SocketMapKey& key, SocketId* id) { std::shared_ptr empty_ptr; - return SocketMapInsert(key, id, empty_ptr); + return SocketMapInsert(key, id, empty_ptr, false); } // Find the SocketId associated with `key'. @@ -144,10 +150,15 @@ class SocketMap { ~SocketMap(); int Init(const SocketMapOptions&); int Insert(const SocketMapKey& key, SocketId* id, - const std::shared_ptr& ssl_ctx); + const std::shared_ptr& ssl_ctx, + bool use_rdma); + int Insert(const SocketMapKey& key, SocketId* id, + const std::shared_ptr& ssl_ctx) { + return Insert(key, id, ssl_ctx, false); + } int Insert(const SocketMapKey& key, SocketId* id) { std::shared_ptr empty_ptr; - return Insert(key, id, empty_ptr); + return Insert(key, id, empty_ptr, false); } void Remove(const SocketMapKey& key, SocketId expected_id); diff --git a/src/brpc/thrift_message.h b/src/brpc/thrift_message.h index e9a7a3af5f..73716c12ed 100644 --- a/src/brpc/thrift_message.h +++ b/src/brpc/thrift_message.h @@ -25,6 +25,7 @@ #include "brpc/channel_base.h" #include "brpc/controller.h" #include "brpc/proto_base.pb.h" +#include "brpc/pb_compat.h" namespace apache { namespace thrift { @@ -83,11 +84,11 @@ friend class ThriftStub; // implements Message ---------------------------------------------- - ThriftFramedMessage* New() const override; + ThriftFramedMessage* New() const PB_319_OVERRIDE; #if GOOGLE_PROTOBUF_VERSION >= 3006000 ThriftFramedMessage* New(::google::protobuf::Arena* arena) const override; #endif - void CopyFrom(const ::google::protobuf::Message& from) override; + void CopyFrom(const ::google::protobuf::Message& from) PB_321_OVERRIDE; void MergeFrom(const ::google::protobuf::Message& from) override; void CopyFrom(const ThriftFramedMessage& from); void MergeFrom(const ThriftFramedMessage& from); @@ -96,10 +97,10 @@ friend class ThriftStub; int ByteSize() const; bool MergePartialFromCodedStream( - ::google::protobuf::io::CodedInputStream* input) override; + ::google::protobuf::io::CodedInputStream* input) PB_310_OVERRIDE; void SerializeWithCachedSizes( - ::google::protobuf::io::CodedOutputStream* output) const override; - ::google::protobuf::uint8* SerializeWithCachedSizesToArray(::google::protobuf::uint8* output) const override; + ::google::protobuf::io::CodedOutputStream* output) const PB_310_OVERRIDE; + ::google::protobuf::uint8* SerializeWithCachedSizesToArray(::google::protobuf::uint8* output) const PB_310_OVERRIDE; int GetCachedSize() const override { return ByteSize(); } protected: diff --git a/src/bthread/butex.cpp b/src/bthread/butex.cpp index cc43702ae9..99f7d7eaf0 100644 --- a/src/bthread/butex.cpp +++ b/src/bthread/butex.cpp @@ -100,6 +100,7 @@ struct ButexBthreadWaiter : public ButexWaiter { int expected_value; Butex* initial_butex; TaskControl* control; + const timespec* abstime; }; // pthread_task or main_task allocates this structure on stack and queue it @@ -534,6 +535,14 @@ static void wait_for_butex(void* arg) { !bw->task_meta->interrupted) { b->waiters.Append(bw); bw->container.store(b, butil::memory_order_relaxed); + if (bw->abstime != NULL) { + bw->sleep_id = get_global_timer_thread()->schedule( + erase_from_butex_and_wakeup, bw, *bw->abstime); + if (!bw->sleep_id) { // TimerThread stopped. + errno = ESTOP; + erase_from_butex_and_wakeup(bw); + } + } return; } } @@ -542,7 +551,7 @@ static void wait_for_butex(void* arg) { // TaskGroup::interrupt() no-op, there's no race between following code and // the two functions. The on-stack ButexBthreadWaiter is safe to use and // bw->waiter_state will not change again. - unsleep_if_necessary(bw, get_global_timer_thread()); + // unsleep_if_necessary(bw, get_global_timer_thread()); tls_task_group->ready_to_run(bw->tid); // FIXME: jump back to original thread is buggy. @@ -648,6 +657,7 @@ int butex_wait(void* arg, int expected_value, const timespec* abstime) { bbw.expected_value = expected_value; bbw.initial_butex = b; bbw.control = g->control(); + bbw.abstime = abstime; if (abstime != NULL) { // Schedule timer before queueing. If the timer is triggered before @@ -658,12 +668,6 @@ int butex_wait(void* arg, int expected_value, const timespec* abstime) { errno = ETIMEDOUT; return -1; } - bbw.sleep_id = get_global_timer_thread()->schedule( - erase_from_butex_and_wakeup, &bbw, *abstime); - if (!bbw.sleep_id) { // TimerThread stopped. - errno = ESTOP; - return -1; - } } #ifdef SHOW_BTHREAD_BUTEX_WAITER_COUNT_IN_VARS bvar::Adder& num_waiters = butex_waiter_count(); diff --git a/src/bthread/task_group.cpp b/src/bthread/task_group.cpp index b8ead16342..94ce5eb784 100644 --- a/src/bthread/task_group.cpp +++ b/src/bthread/task_group.cpp @@ -248,6 +248,9 @@ int TaskGroup::init(size_t runqueue_capacity) { return 0; } +#if defined(__linux__) && defined(__aarch64__) && defined(__clang__) + __attribute__((optnone)) +#endif void TaskGroup::task_runner(intptr_t skip_remained) { // NOTE: tls_task_group is volatile since tasks are moved around // different groups. @@ -567,6 +570,9 @@ void TaskGroup::sched(TaskGroup** pg) { sched_to(pg, next_tid); } +#if defined(__linux__) && defined(__aarch64__) && defined(__clang__) + __attribute__((optnone)) +#endif void TaskGroup::sched_to(TaskGroup** pg, TaskMeta* next_meta) { TaskGroup* g = *pg; #ifndef NDEBUG diff --git a/src/butil/class_name.h b/src/butil/class_name.h index 48babf7930..29c91a9c48 100644 --- a/src/butil/class_name.h +++ b/src/butil/class_name.h @@ -30,7 +30,7 @@ namespace butil { std::string demangle(const char* name); -namespace detail { +namespace { template struct ClassNameHelper { static std::string name; }; template std::string ClassNameHelper::name = demangle(typeid(T).name()); } @@ -39,7 +39,7 @@ template std::string ClassNameHelper::name = demangle(typeid(T). template const std::string& class_name_str() { // We don't use static-variable-inside-function because before C++11 // local static variable is not guaranteed to be thread-safe. - return detail::ClassNameHelper::name; + return ClassNameHelper::name; } // Get name of class |T|, in const char*. diff --git a/src/butil/containers/flat_map_inl.h b/src/butil/containers/flat_map_inl.h index e5948920f6..ad5598c2f9 100644 --- a/src/butil/containers/flat_map_inl.h +++ b/src/butil/containers/flat_map_inl.h @@ -567,10 +567,12 @@ void FlatMap<_K, _T, _H, _E, _S>::save_iterator( template typename FlatMap<_K, _T, _H, _E, _S>::const_iterator FlatMap<_K, _T, _H, _E, _S>::restore_iterator(const PositionHint& hint) const { - if (hint.nbucket != _nbucket/*resized*/ || - hint.offset >= _nbucket/*invalid hint*/) { - return begin(); // restart - } + if (hint.nbucket != _nbucket) // resized + return begin(); // restart + + if (hint.offset >= _nbucket) // invalid hint, stop the iteration + return end(); + Bucket& first_node = _buckets[hint.offset]; if (hint.at_entry) { return const_iterator(this, hint.offset); @@ -604,6 +606,10 @@ bool FlatMap<_K, _T, _H, _E, _S>::resize(size_t nbucket2) { } FlatMap new_map; + // NOTE: following functors must be kept after resizing otherwise the + // internal state is lost. + new_map._hashfn = _hashfn; + new_map._eql = _eql; if (new_map.init(nbucket2, _load_factor) != 0) { LOG(ERROR) << "Fail to init new_map, nbucket=" << nbucket2; return false; diff --git a/src/butil/errno.cpp b/src/butil/errno.cpp index 8b3eeb7604..9b964e114f 100644 --- a/src/butil/errno.cpp +++ b/src/butil/errno.cpp @@ -60,9 +60,8 @@ int DescribeCustomizedErrno( if (desc && strncmp(desc, "Unknown error", 13) != 0) #endif { - fprintf(stderr, "Fail to define %s(%d) which is already defined as `%s', abort.", + fprintf(stderr, "WARNING: Fail to define %s(%d) which is already defined as `%s'", error_name, error_code, desc); - _exit(1); } } errno_desc[error_code - ERRNO_BEGIN] = description; diff --git a/src/butil/logging.cc b/src/butil/logging.cc index 9db75cdeee..fa0dbae8aa 100644 --- a/src/butil/logging.cc +++ b/src/butil/logging.cc @@ -21,6 +21,7 @@ #include DEFINE_bool(log_as_json, false, "Print log as a valid JSON"); +DEFINE_bool(escape_log, false, "Escape log content before printing"); #if !BRPC_WITH_GLOG @@ -466,7 +467,7 @@ static void PrintLogSeverity(std::ostream& os, int severity) { } } -static void PrintLogPrefix( +void PrintLogPrefix( std::ostream& os, int severity, const char* file, int line) { PrintLogSeverity(os, severity); #if defined(OS_LINUX) @@ -565,12 +566,36 @@ static void PrintLogPrefixAsJSON( os << "\"C\":\"" << file << ':' << line << "\""; } -static void PrintLog(std::ostream& os, - int severity, const char* file, int line, - const butil::StringPiece& content) { +void EscapeJson(std::ostream& os, const butil::StringPiece& s) { + for (auto it = s.begin(); it != s.end(); it++) { + auto c = *it; + switch (c) { + case '"': os << "\\\""; break; + case '\\': os << "\\\\"; break; + case '\b': os << "\\b"; break; + case '\f': os << "\\f"; break; + case '\n': os << "\\n"; break; + case '\r': os << "\\r"; break; + case '\t': os << "\\t"; break; + default: os << c; + } + } +} + +inline void OutputLog(std::ostream& os, const butil::StringPiece& s) { + if (FLAGS_escape_log) { + EscapeJson(os, s); + } else { + os.write(s.data(), s.length()); + } +} + +void PrintLog(std::ostream& os, + int severity, const char* file, int line, + const butil::StringPiece& content) { if (!FLAGS_log_as_json) { PrintLogPrefix(os, severity, file, line); - os.write(content.data(), content.size()); + OutputLog(os, content); } else { os << '{'; PrintLogPrefixAsJSON(os, severity, file, line); @@ -582,7 +607,7 @@ static void PrintLog(std::ostream& os, } else { os << ','; } - os.write(content.data(), content.size()); + OutputLog(os, content); if (pair_quote) { os << '"'; } else if (!content.empty() && content[content.size()-1] != '"') { diff --git a/src/butil/logging.h b/src/butil/logging.h index f92cd780ce..e2a410985a 100644 --- a/src/butil/logging.h +++ b/src/butil/logging.h @@ -319,6 +319,11 @@ class LogSink { // Returns previous sink. BUTIL_EXPORT LogSink* SetLogSink(LogSink* sink); +// Print |content| with other info into |os|. +void PrintLog(std::ostream& os, + int severity, const char* file, int line, + const butil::StringPiece& content); + // The LogSink mainly for unit-testing. Logs will be appended to it. class StringSink : public LogSink, public std::string { public: diff --git a/src/butil/recordio.cc b/src/butil/recordio.cc old mode 100755 new mode 100644 diff --git a/src/butil/recordio.h b/src/butil/recordio.h old mode 100755 new mode 100644 diff --git a/src/bvar/mvariable.cpp b/src/bvar/mvariable.cpp index 473d612b86..2e191a1654 100644 --- a/src/bvar/mvariable.cpp +++ b/src/bvar/mvariable.cpp @@ -45,7 +45,7 @@ static bool validator_bvar_max_multi_dimension_metric_number(const char*, int32_ return true; } -const bool ALLOW_UNUSED dummp_bvar_max_multi_dimension_metric_number = ::google::RegisterFlagValidator( +const bool ALLOW_UNUSED dummp_bvar_max_multi_dimension_metric_number = ::GFLAGS_NS::RegisterFlagValidator( &FLAGS_bvar_max_multi_dimension_metric_number, validator_bvar_max_multi_dimension_metric_number); class MVarEntry { diff --git a/src/bvar/variable.cpp b/src/bvar/variable.cpp index c14e8dcb12..1f9da3a1bb 100644 --- a/src/bvar/variable.cpp +++ b/src/bvar/variable.cpp @@ -705,9 +705,9 @@ DEFINE_string(bvar_dump_exclude, "", "Dump bvar excluded from these wildcards, " "separated by semicolon(;), empty means no exclusion"); DEFINE_string(bvar_dump_prefix, "", "Every dumped name starts with this prefix"); DEFINE_string(bvar_dump_tabs, "latency=*_latency*" - "; qps=*_qps*" - "; error=*_error*" - "; system=*process_*,*malloc_*,*kernel_*", + ";qps=*_qps*" + ";error=*_error*" + ";system=*process_*,*malloc_*,*kernel_*", "Dump bvar into different tabs according to the filters (seperated by semicolon), " "format: *(tab_name=wildcards;)"); @@ -920,11 +920,11 @@ const bool ALLOW_UNUSED dummy_bvar_dump_prefix = ::GFLAGS_NS::RegisterFlagValida const bool ALLOW_UNUSED dummy_bvar_dump_tabs = ::GFLAGS_NS::RegisterFlagValidator( &FLAGS_bvar_dump_tabs, wakeup_dumping_thread); -const bool ALLOW_UNUSED dummy_mbvar_dump = ::google::RegisterFlagValidator( +const bool ALLOW_UNUSED dummy_mbvar_dump = ::GFLAGS_NS::RegisterFlagValidator( &FLAGS_mbvar_dump, validate_bvar_dump); -const bool ALLOW_UNUSED dummy_mbvar_dump_prefix = ::google::RegisterFlagValidator( +const bool ALLOW_UNUSED dummy_mbvar_dump_prefix = ::GFLAGS_NS::RegisterFlagValidator( &FLAGS_mbvar_dump_prefix, wakeup_dumping_thread); -const bool ALLOW_UNUSED dump_mbvar_dump_file = ::google::RegisterFlagValidator( +const bool ALLOW_UNUSED dump_mbvar_dump_file = ::GFLAGS_NS::RegisterFlagValidator( &FLAGS_mbvar_dump_file, wakeup_dumping_thread); static bool validate_mbvar_dump_format(const char*, const std::string& format) { @@ -940,7 +940,7 @@ static bool validate_mbvar_dump_format(const char*, const std::string& format) { return true; } -const bool ALLOW_UNUSED dummy_mbvar_dump_format = ::google::RegisterFlagValidator( +const bool ALLOW_UNUSED dummy_mbvar_dump_format = ::GFLAGS_NS::RegisterFlagValidator( &FLAGS_mbvar_dump_format, validate_mbvar_dump_format); void to_underscored_name(std::string* name, const butil::StringPiece& src) { diff --git a/test/BUILD.bazel b/test/BUILD.bazel index a170eaf8cb..a2eea9cf08 100644 --- a/test/BUILD.bazel +++ b/test/BUILD.bazel @@ -13,13 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -load("//:bazel/brpc.bzl", "brpc_proto_library") - -config_setting( - name = "darwin", - values = {"cpu": "darwin"}, - visibility = ["//visibility:public"], -) +load("@rules_proto//proto:defs.bzl", "proto_library") +load("@rules_cc//cc:defs.bzl", "cc_library", "cc_proto_library", "cc_test") +load("@com_grail_bazel_compdb//:defs.bzl", "compilation_database") +load("@com_grail_bazel_output_base_util//:defs.bzl", "OUTPUT_BASE") COPTS = [ "-D__STDC_FORMAT_MACROS", @@ -41,24 +38,10 @@ COPTS = [ "-DBVAR_NOT_LINK_DEFAULT_VARIABLES", "-DUNIT_TEST", ] + select({ - "//:with_glog": ["-DBRPC_WITH_GLOG=1"], + "//bazel/config:brpc_with_glog": ["-DBRPC_WITH_GLOG=1"], "//conditions:default": ["-DBRPC_WITH_GLOG=0"], }) -LINKOPTS = [ - "-lpthread", - "-lssl", - "-lcrypto", - "-ldl", - "-lz", -] + select({ - ":darwin": [], - "//conditions:default": [ - "-lrt", - - ], -}) - TEST_BUTIL_SOURCES = [ "at_exit_unittest.cc", "atomicops_unittest.cc", @@ -157,38 +140,39 @@ TEST_BUTIL_SOURCES = [ "bounded_queue_unittest.cc", "butil_unittest_main.cpp", ] + select({ - "@bazel_tools//tools/osx:darwin": [], - "//conditions:default": [ - "test_file_util_linux.cc", - "proc_maps_linux_unittest.cc", - ], + "@bazel_tools//tools/osx:darwin": [], + "//conditions:default": [ + "test_file_util_linux.cc", + "proc_maps_linux_unittest.cc", + ], }) proto_library( name = "test_proto", - srcs = glob([ - "*.proto", - ], - exclude = [ - "echo.proto", - ] + srcs = glob( + [ + "*.proto", + ], + exclude = [ + "echo.proto", + ], ), visibility = ["//visibility:public"], ) cc_proto_library( name = "cc_test_proto", + visibility = ["//visibility:public"], deps = [ ":test_proto", ], - visibility = ["//visibility:public"], ) cc_library( name = "sstream_workaround", hdrs = [ "sstream_workaround.h", - ] + ], ) cc_test( @@ -198,53 +182,74 @@ cc_test( "multiprocess_func_list.h", "test_switches.h", ], + copts = COPTS, deps = [ - ":sstream_workaround", ":cc_test_proto", + ":sstream_workaround", "//:brpc", "@com_google_googletest//:gtest", ], - copts = COPTS, ) cc_test( name = "bvar_test", - srcs = glob([ - "bvar_*_unittest.cpp", - ], - exclude = [ - "bvar_lock_timer_unittest.cpp", - "bvar_recorder_unittest.cpp", - ]), + srcs = glob( + [ + "bvar_*_unittest.cpp", + ], + exclude = [ + "bvar_lock_timer_unittest.cpp", + "bvar_recorder_unittest.cpp", + ], + ), + copts = COPTS, deps = [ ":sstream_workaround", "//:bvar", "@com_google_googletest//:gtest", ], - copts = COPTS, ) cc_test( name = "bthread_test", - srcs = glob([ - "bthread_*_unittest.cpp", - ], - exclude = [ - "bthread_cond_unittest.cpp", - "bthread_execution_queue_unittest.cpp", - "bthread_dispatcher_unittest.cpp", - "bthread_fd_unittest.cpp", - "bthread_mutex_unittest.cpp", - "bthread_setconcurrency_unittest.cpp", - # glog CHECK die with a fatal error - "bthread_key_unittest.cpp" - ]), + srcs = glob( + [ + "bthread_*_unittest.cpp", + ], + exclude = [ + "bthread_cond_unittest.cpp", + "bthread_execution_queue_unittest.cpp", + "bthread_dispatcher_unittest.cpp", + "bthread_fd_unittest.cpp", + "bthread_mutex_unittest.cpp", + "bthread_setconcurrency_unittest.cpp", + # glog CHECK die with a fatal error + "bthread_key_unittest.cpp", + ], + ), + copts = COPTS, deps = [ ":sstream_workaround", "//:brpc", "@com_google_googletest//:gtest", "@com_google_googletest//:gtest_main", ], - copts = COPTS, ) +compilation_database( + name = "brpc_test_compdb", + # Use test profile + testonly = True, + # OUTPUT_BASE is a dynamic value that will vary for each user workspace. + # If you would like your build outputs to be the same across users, then + # skip supplying this value, and substitute the default constant value + # "__OUTPUT_BASE__" through an external tool like `sed` or `jq` (see + # below shell commands for usage). + output_base = OUTPUT_BASE, + targets = [ + "//:brpc", + ":bvar_test", + ":bthread_test", + ":butil_test", + ], +) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 980794f5ff..9720a0fabe 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -51,7 +51,7 @@ else() message(FATAL_ERROR "Googletest is not available") endif() -set(CMAKE_CPP_FLAGS "${DEFINE_CLOCK_GETTIME} -DBRPC_WITH_GLOG=${WITH_GLOG_VAL} -DGFLAGS_NS=${GFLAGS_NS}") +set(CMAKE_CPP_FLAGS "${DEFINE_CLOCK_GETTIME} -DBRPC_WITH_GLOG=${WITH_GLOG_VAL} -DBRPC_WITH_RDMA=${WITH_RDMA_VAL} -DGFLAGS_NS=${GFLAGS_NS}") set(CMAKE_CPP_FLAGS "${CMAKE_CPP_FLAGS} -DBTHREAD_USE_FAST_PTHREAD_MUTEX -D__const__=__unused__ -D_GNU_SOURCE -DUSE_SYMBOLIZE -DNO_TCMALLOC -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -DUNIT_TEST -Dprivate=public -Dprotected=public -DBVAR_NOT_LINK_DEFAULT_VARIABLES -D__STRICT_ANSI__ -include ${PROJECT_SOURCE_DIR}/test/sstream_workaround.h") set(CMAKE_CXX_FLAGS "${CMAKE_CPP_FLAGS} -g -O2 -pipe -Wall -W -fPIC -fstrict-aliasing -Wno-invalid-offsetof -Wno-unused-parameter -fno-omit-frame-pointer") use_cxx11() diff --git a/test/brpc_block_pool_unittest.cpp b/test/brpc_block_pool_unittest.cpp new file mode 100644 index 0000000000..d0bdd8e492 --- /dev/null +++ b/test/brpc_block_pool_unittest.cpp @@ -0,0 +1,219 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +#include +#include +#include +#if BRPC_WITH_RDMA +#include "bthread/bthread.h" +#include "butil/time.h" +#include "brpc/rdma/block_pool.h" + +class BlockPoolTest : public ::testing::Test { +protected: + BlockPoolTest() { } + ~BlockPoolTest() { } +}; + +namespace brpc { +namespace rdma { +DECLARE_int32(rdma_memory_pool_initial_size_mb); +DECLARE_int32(rdma_memory_pool_increase_size_mb); +DECLARE_int32(rdma_memory_pool_max_regions); +DECLARE_int32(rdma_memory_pool_buckets); +extern void DestroyBlockPool(); +extern int GetBlockType(void* buf); +extern size_t GetGlobalLen(int block_type); +extern size_t GetRegionNum(); +} +} + +using namespace brpc::rdma; + +static uint32_t DummyCallback(void*, size_t) { + return 1; +} + +TEST_F(BlockPoolTest, single_thread) { + FLAGS_rdma_memory_pool_initial_size_mb = 1024; + FLAGS_rdma_memory_pool_increase_size_mb = 1024; + FLAGS_rdma_memory_pool_max_regions = 16; + FLAGS_rdma_memory_pool_buckets = 4; + EXPECT_TRUE(InitBlockPool(DummyCallback) != NULL); + + size_t num = 1024; + void* buf[num]; + for (size_t i = 0; i < num; ++i) { + buf[i] = AllocBlock(GetBlockSize(0)); + EXPECT_TRUE(buf[i] != NULL); + EXPECT_EQ(0, GetBlockType(buf[i])); + } + for (size_t i = 0; i < num; ++i) { + DeallocBlock(buf[i]); + buf[i] = NULL; + } + for (size_t i = 0; i < num; ++i) { + buf[i] = AllocBlock(GetBlockSize(0) + 1); + EXPECT_TRUE(buf[i] != NULL); + EXPECT_EQ(1, GetBlockType(buf[i])); + } + for (int i = num - 1; i >= 0; --i) { + DeallocBlock(buf[i]); + buf[i] = NULL; + } + for (size_t i = 0; i < num; ++i) { + buf[i] = AllocBlock(GetBlockSize(2)); + EXPECT_TRUE(buf[i] != NULL); + EXPECT_EQ(2, GetBlockType(buf[i])); + } + for (int i = num - 1; i >= 0; --i) { + DeallocBlock(buf[i]); + buf[i] = NULL; + } + + DestroyBlockPool(); +} + +static void* AllocAndDealloc(void* arg) { + uintptr_t i = (uintptr_t)arg; + int len = GetBlockSize(i % 3); + int iterations = 1000; + while (iterations > 0) { + void* buf = AllocBlock(len); + EXPECT_TRUE(buf != NULL); + EXPECT_EQ(i % 3, GetBlockType(buf)); + DeallocBlock(buf); + --iterations; + } + return NULL; +} + +TEST_F(BlockPoolTest, multiple_thread) { + FLAGS_rdma_memory_pool_initial_size_mb = 1024; + FLAGS_rdma_memory_pool_increase_size_mb = 1024; + FLAGS_rdma_memory_pool_max_regions = 16; + FLAGS_rdma_memory_pool_buckets = 4; + EXPECT_TRUE(InitBlockPool(DummyCallback) != NULL); + + uintptr_t thread_num = 32; + bthread_t tid[thread_num]; + bthread_attr_t attr = BTHREAD_ATTR_NORMAL; + uint64_t start_time = butil::cpuwide_time_us(); + for (uintptr_t i = 0; i < thread_num; ++i) { + ASSERT_EQ(0, bthread_start_background(&tid[i], &attr, AllocAndDealloc, (void*)i)); + } + for (uintptr_t i = 0; i < thread_num; ++i) { + ASSERT_EQ(0, bthread_join(tid[i], 0)); + } + LOG(INFO) << "Total time = " << butil::cpuwide_time_us() - start_time << "us"; + + DestroyBlockPool(); +} + +TEST_F(BlockPoolTest, extend) { + FLAGS_rdma_memory_pool_initial_size_mb = 64; + FLAGS_rdma_memory_pool_increase_size_mb = 64; + FLAGS_rdma_memory_pool_max_regions = 16; + FLAGS_rdma_memory_pool_buckets = 1; + EXPECT_TRUE(InitBlockPool(DummyCallback) != NULL); + + EXPECT_EQ(1, GetRegionNum()); + size_t num = 15 * 64 * 1024 * 1024 / GetBlockSize(2); + void* buf[num]; + for (size_t i = 0; i < num; ++i) { + buf[i] = AllocBlock(65537); + EXPECT_TRUE(buf[i] != NULL); + } + EXPECT_EQ(16, GetRegionNum()); + for (size_t i = 0; i < num; ++i) { + DeallocBlock(buf[i]); + } + EXPECT_EQ(16, GetRegionNum()); + + DestroyBlockPool(); +} + +TEST_F(BlockPoolTest, memory_not_enough) { + FLAGS_rdma_memory_pool_initial_size_mb = 64; + FLAGS_rdma_memory_pool_increase_size_mb = 64; + FLAGS_rdma_memory_pool_max_regions = 2; + FLAGS_rdma_memory_pool_buckets = 1; + EXPECT_TRUE(InitBlockPool(DummyCallback) != NULL); + + EXPECT_EQ(1, GetRegionNum()); + size_t num = 64 * 1024 * 1024 / GetBlockSize(2); + void* buf[num]; + for (size_t i = 0; i < num; ++i) { + buf[i] = AllocBlock(65537); + EXPECT_TRUE(buf[i] != NULL); + } + EXPECT_EQ(2, GetRegionNum()); + void* tmp = AllocBlock(65536); + EXPECT_EQ(ENOMEM, errno); + EXPECT_EQ(0, GetRegionId(tmp)); + for (size_t i = 0; i < num; ++i) { + DeallocBlock(buf[i]); + } + EXPECT_EQ(2, GetRegionNum()); + + DestroyBlockPool(); +} + +TEST_F(BlockPoolTest, invalid_use) { + FLAGS_rdma_memory_pool_initial_size_mb = 64; + FLAGS_rdma_memory_pool_increase_size_mb = 64; + FLAGS_rdma_memory_pool_max_regions = 2; + FLAGS_rdma_memory_pool_buckets = 1; + EXPECT_TRUE(InitBlockPool(DummyCallback) != NULL); + + void* buf = AllocBlock(0); + EXPECT_EQ(NULL, buf); + EXPECT_EQ(EINVAL, errno); + + buf = AllocBlock(GetBlockSize(2) + 1); + EXPECT_EQ(NULL, buf); + EXPECT_EQ(EINVAL, errno); + + errno = 0; + DeallocBlock(NULL); + EXPECT_EQ(EINVAL, errno); + + DestroyBlockPool(); +} + +TEST_F(BlockPoolTest, dump_info) { + FLAGS_rdma_memory_pool_initial_size_mb = 64; + FLAGS_rdma_memory_pool_increase_size_mb = 64; + FLAGS_rdma_memory_pool_max_regions = 2; + FLAGS_rdma_memory_pool_buckets = 4; + EXPECT_TRUE(InitBlockPool(DummyCallback) != NULL); + DumpMemoryPoolInfo(std::cout); + void* buf = AllocBlock(8192); + DumpMemoryPoolInfo(std::cout); + DeallocBlock(buf); + DumpMemoryPoolInfo(std::cout); + DestroyBlockPool(); +} + +#endif // if BRPC_WITH_RDMA + +int main(int argc, char* argv[]) { + testing::InitGoogleTest(&argc, argv); + GFLAGS_NS::ParseCommandLineFlags(&argc, &argv, true); + return RUN_ALL_TESTS(); +} diff --git a/test/brpc_naming_service_unittest.cpp b/test/brpc_naming_service_unittest.cpp index e6a62c85d6..43ac9f474a 100644 --- a/test/brpc_naming_service_unittest.cpp +++ b/test/brpc_naming_service_unittest.cpp @@ -19,8 +19,10 @@ #include #include #include "butil/string_printf.h" +#include "butil/strings/string_split.h" #include "butil/files/temp_file.h" #include "bthread/bthread.h" +#include "brpc/http_status_code.h" #ifdef BAIDU_INTERNAL #include "brpc/policy/baidu_naming_service.h" #endif @@ -30,6 +32,7 @@ #include "brpc/policy/list_naming_service.h" #include "brpc/policy/remote_file_naming_service.h" #include "brpc/policy/discovery_naming_service.h" +#include "brpc/policy/nacos_naming_service.h" #include "echo.pb.h" #include "brpc/server.h" @@ -45,6 +48,9 @@ DECLARE_string(consul_service_discovery_url); DECLARE_string(discovery_api_addr); DECLARE_string(discovery_env); DECLARE_int32(discovery_renew_interval_s); +DECLARE_string(nacos_address); +DECLARE_string(nacos_username); +DECLARE_string(nacos_password); } // policy } // brpc @@ -697,4 +703,139 @@ TEST(NamingServiceTest, discovery_sanity) { } } +class NacosNamingServiceImpl : public test::NacosNamingService { +public: + void Login(google::protobuf::RpcController* cntl_base, + const test::HttpRequest*, test::HttpResponse*, + google::protobuf::Closure* done) override { + brpc::ClosureGuard done_guard(done); + brpc::Controller* cntl = static_cast(cntl_base); + + butil::StringPairs user; + butil::SplitStringIntoKeyValuePairs( + cntl->request_attachment().to_string(), '=', '&', &user); + + const auto expected_user = + butil::StringPairs{{"username", "nacos"}, {"password", "nacos"}}; + + if (user == expected_user) { + cntl->http_response().set_content_type("application/json"); + cntl->response_attachment().append( +R"({ + "accessToken": "eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJuYWNvcyIsImV4cCI6MTY2MzAwODMzNn0.YKJJwzHT4v9cpC7kVqWroeJK1WioOYe0JZy4KX8nExs", + "tokenTtl": 18000, + "globalAdmin": true, + "username": "nacos" + })"); + } else { + cntl->http_response().set_status_code(brpc::HTTP_STATUS_FORBIDDEN); + cntl->response_attachment().append("unknow user!"); + } + } + + void List(google::protobuf::RpcController* cntl_base, + const test::HttpRequest*, test::HttpResponse*, + google::protobuf::Closure* done) override { + brpc::ClosureGuard done_guard(done); + brpc::Controller* cntl = (brpc::Controller*)cntl_base; + + auto token = cntl->http_request().uri().GetQuery("accessToken"); + if (token == nullptr || + *token != + "eyJhbGciOiJIUzI1NiJ9." + "eyJzdWIiOiJuYWNvcyIsImV4cCI6MTY2MzAwODMzNn0." + "YKJJwzHT4v9cpC7kVqWroeJK1WioOYe0JZy4KX8nExs") { + cntl->http_response().set_status_code(brpc::HTTP_STATUS_FORBIDDEN); + cntl->response_attachment().append( +R"({ + "timestamp": "2022-09-12T22:56:02.730+08:00", + "status": 403, + "error": "Forbidden", + "path": "/nacos/v1/ns/instance/list" + })"); + return; + } + + auto service_name = cntl->http_request().uri().GetQuery("serviceName"); + auto group_name = cntl->http_request().uri().GetQuery("groupName"); + auto namespace_id = cntl->http_request().uri().GetQuery("namespaceId"); + auto clusters = cntl->http_request().uri().GetQuery("clusters"); + if (service_name == nullptr || *service_name != "test" || + group_name == nullptr || *group_name != "g1" || + namespace_id == nullptr || *namespace_id != "n1" || + clusters == nullptr || *clusters != "wx") { + cntl->http_response().set_status_code(brpc::HTTP_STATUS_NOT_FOUND); + return; + } + + cntl->http_response().set_content_type("application/json"); + cntl->response_attachment().append( +R"({ + "name": "g1@@test", + "groupName": "g1", + "clusters": "wx", + "cacheMillis": 10000, + "hosts": + [ + { + "instanceId": "127.0.0.1#8888#wx#g1@@test", + "ip": "127.0.0.1", + "port": 8888, + "weight": 10.0, + "healthy": true, + "enabled": true, + "ephemeral": true, + "clusterName": "wx", + "serviceName": "g1@@test", + "metadata": {}, + "instanceHeartBeatInterval": 5000, + "instanceHeartBeatTimeOut": 15000, + "ipDeleteTimeout": 30000, + "instanceIdGenerator": "simple" + } + ], + "lastRefTime": 1662990336712, + "checksum": "", + "allIPs": false, + "reachProtectionThreshold": false, + "valid": true + })"); + } +}; + +TEST(NamingServiceTest, nacos) { + brpc::Server server; + NacosNamingServiceImpl svc; + ASSERT_EQ(0, server.AddService(&svc, brpc::SERVER_DOESNT_OWN_SERVICE, + "/nacos/v1/auth/login => Login, " + "/nacos/v1/ns/instance/list => List")); + ASSERT_EQ(0, server.Start("localhost:8848", nullptr)); + + bthread_usleep(5000000); + + butil::EndPoint ep; + ASSERT_EQ(0, butil::str2endpoint("127.0.0.1:8888", &ep)); + const auto expected_node = brpc::ServerNode(ep, "10"); + + const char* service_name = + "serviceName=test&groupName=g1&namespaceId=n1&clusters=wx"; + brpc::policy::FLAGS_nacos_address = "http://localhost:8848"; + brpc::policy::FLAGS_nacos_username = "nacos"; + brpc::policy::FLAGS_nacos_password = "nacos"; + + { + brpc::policy::NacosNamingService nns; + std::vector nodes; + ASSERT_EQ(0, nns.GetServers(service_name, &nodes)); + ASSERT_EQ(nodes.size(), 1); + ASSERT_EQ(expected_node, nodes[0]); + } + { + brpc::policy::FLAGS_nacos_password = "invalid_password"; + brpc::policy::NacosNamingService nns; + std::vector nodes; + ASSERT_NE(0, nns.GetServers(service_name, &nodes)); + } +} + } //namespace diff --git a/test/brpc_rdma_unittest.cpp b/test/brpc_rdma_unittest.cpp new file mode 100644 index 0000000000..158ba4107e --- /dev/null +++ b/test/brpc_rdma_unittest.cpp @@ -0,0 +1,1954 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +#include +#include +#include +#include +#if BRPC_WITH_RDMA +#include +#include "butil/endpoint.h" +#include "butil/fd_guard.h" +#include "butil/fd_utility.h" +#include "butil/iobuf.h" +#include "butil/sys_byteorder.h" +#include "butil/files/temp_file.h" +#include "brpc/acceptor.h" +#include "brpc/channel.h" +#include "brpc/controller.h" +#include "brpc/server.h" +#include "brpc/socket.h" +#include "brpc/errno.pb.h" +#include "brpc/parallel_channel.h" +#include "brpc/selective_channel.h" +#include "brpc/rdma/block_pool.h" +#include "brpc/rdma/rdma_endpoint.h" +#include "brpc/rdma/rdma_helper.h" +#include "echo.pb.h" + +static const int PORT = 8713; + +using namespace brpc; + +namespace brpc { + +DECLARE_int64(socket_max_unwritten_bytes); +DECLARE_bool(log_idle_connection_close); +DEFINE_bool(rdma_test_enable, false, "Enable tests requring rdma runtime."); + +namespace rdma { + +struct HelloMessage { + void Serialize(void* data) const; + void Deserialize(void* data); + + uint16_t msg_len; + uint16_t hello_ver; + uint16_t impl_ver; + uint16_t block_size; + uint16_t sq_size; + uint16_t rq_size; + ibv_gid gid; + uint32_t qp_num; +}; + +DECLARE_bool(rdma_trace_verbose); +DECLARE_int32(rdma_memory_pool_max_regions); +extern ibv_cq* (*IbvCreateCq)(ibv_context*, int, void*, ibv_comp_channel*, int); +extern int (*IbvDestroyCq)(ibv_cq*); +extern ibv_qp* (*IbvCreateQp)(ibv_pd*, ibv_qp_init_attr*); +extern int (*IbvModifyQp)(ibv_qp*, ibv_qp_attr*, ibv_qp_attr_mask); +extern int (*IbvQueryQp)(ibv_qp*, ibv_qp_attr*, ibv_qp_attr_mask, ibv_qp_init_attr*); +extern int (*IbvDestroyQp)(ibv_qp*); +extern butil::atomic g_rdma_available; +extern bool g_skip_rdma_init; +} +} + +static std::string g_ip = "127.0.0.1"; +static butil::EndPoint g_ep; + +class MyEchoService : public ::test::EchoService { + void Echo(google::protobuf::RpcController* cntl_base, + const ::test::EchoRequest* req, + ::test::EchoResponse* res, + google::protobuf::Closure* done) { + Controller* cntl = static_cast(cntl_base); + ClosureGuard done_guard(done); + if (req->server_fail()) { + cntl->SetFailed(req->server_fail(), "Server fail1"); + cntl->SetFailed(req->server_fail(), "Server fail2"); + return; + } + if (req->close_fd()) { + usleep(1); + LOG(INFO) << "close fd..."; + cntl->CloseConnection("Close connection according to request"); + return; + } + if (req->sleep_us() > 0) { + LOG(INFO) << "sleep " << req->sleep_us() << "us..."; + bthread_usleep(req->sleep_us()); + } + res->set_message(req->message()); + if (req->code() != 0) { + res->add_code_list(req->code()); + } + cntl->response_attachment().append(cntl->request_attachment()); + } +}; + +class RdmaTest : public ::testing::Test { +protected: + RdmaTest() { + butil::ip_t ip; + EXPECT_EQ(0, butil::str2ip(g_ip.c_str(), &ip)); + butil::EndPoint ep(ip, PORT); + g_ep = ep; + EXPECT_EQ(0, _server_list.save(butil::endpoint2str(g_ep).c_str())); + _naming_url = std::string("File://") + _server_list.fname(); + _server.AddService(&_svc, SERVER_DOESNT_OWN_SERVICE); + } + ~RdmaTest() { } + + virtual void SetUp() { } + + virtual void TearDown() { + rdma::DumpMemoryPoolInfo(std::cout); + } + +private: + void StartServer(bool use_rdma = true) { + ServerOptions options; + options.use_rdma = use_rdma; + options.idle_timeout_sec = 5; + options.max_concurrency = 0; + options.internal_port = -1; + EXPECT_EQ(0, _server.Start(PORT, &options)); + } + + void StopServer() { + _server.Stop(0); + _server.Join(); + } + + Socket* GetSocketFromServer(size_t index) { + std::vector sids; + _server._am->ListConnections(&sids); + if (index >= sids.size()) { + return NULL; + } + SocketUniquePtr s; + if (Socket::Address(sids[index], &s) == 0) { + return s.get(); + } + return NULL; + } + + butil::TempFile _server_list; + std::string _naming_url; + + Server _server; + MyEchoService _svc; +}; + +TEST_F(RdmaTest, client_close_before_hello_send) { + StartServer(); + + sockaddr_in addr; + bzero((char*)&addr, sizeof(addr)); + addr.sin_family = AF_INET; + addr.sin_port = htons(PORT); + + butil::fd_guard sockfd(socket(AF_INET, SOCK_STREAM, 0)); + ASSERT_TRUE(sockfd >= 0); + ASSERT_EQ(0, connect(sockfd, (sockaddr*)&addr, sizeof(sockaddr))); + usleep(100000); // wait for server to handle the msg + Socket* s = GetSocketFromServer(0); + ASSERT_EQ(rdma::RdmaEndpoint::UNINIT, s->_rdma_ep->_state); + close(sockfd); + usleep(100000); // wait for server to handle the msg + ASSERT_EQ(NULL, GetSocketFromServer(0)); + + StopServer(); +} + +TEST_F(RdmaTest, client_hello_msg_invalid_magic_str) { + StartServer(); + + sockaddr_in addr; + bzero((char*)&addr, sizeof(addr)); + addr.sin_family = AF_INET; + addr.sin_port = htons(PORT); + + butil::fd_guard sockfd(socket(AF_INET, SOCK_STREAM, 0)); + ASSERT_TRUE(sockfd >= 0); + ASSERT_EQ(0, connect(sockfd, (sockaddr*)&addr, sizeof(sockaddr))); + usleep(100000); // wait for server to handle the msg + Socket* s = GetSocketFromServer(0); + ASSERT_EQ(rdma::RdmaEndpoint::UNINIT, s->_rdma_ep->_state); + + uint8_t data[38]; + memcpy(data, "PRPC", 4); // send as normal baidu_std protocol + memset(data + 4, 0, 32); + ASSERT_EQ(38, write(sockfd, data, 38)); + usleep(100000); // wait for server to handle the msg + ASSERT_EQ(rdma::RdmaEndpoint::FALLBACK_TCP, s->_rdma_ep->_state); + + StopServer(); +} + +TEST_F(RdmaTest, client_close_during_hello_send) { + StartServer(); + + sockaddr_in addr; + bzero((char*)&addr, sizeof(addr)); + addr.sin_family = AF_INET; + addr.sin_port = htons(PORT); + Socket* s = NULL; + uint8_t data[8]; + + butil::fd_guard sockfd1(socket(AF_INET, SOCK_STREAM, 0)); + ASSERT_TRUE(sockfd1 >= 0); + ASSERT_EQ(0, connect(sockfd1, (sockaddr*)&addr, sizeof(sockaddr))); + usleep(100000); // wait for server to handle the msg + s = GetSocketFromServer(0); + ASSERT_EQ(rdma::RdmaEndpoint::UNINIT, s->_rdma_ep->_state); + memcpy(data, "RD", 2); + ASSERT_EQ(2, write(sockfd1, data, 2)); // break in magic str + usleep(100000); // wait for server to handle the msg + ASSERT_EQ(rdma::RdmaEndpoint::S_HELLO_WAIT, s->_rdma_ep->_state); + close(sockfd1); + usleep(100000); // wait for server to handle the msg + ASSERT_EQ(NULL, GetSocketFromServer(0)); + + butil::fd_guard sockfd2(socket(AF_INET, SOCK_STREAM, 0)); + ASSERT_TRUE(sockfd2 >= 0); + ASSERT_EQ(0, connect(sockfd2, (sockaddr*)&addr, sizeof(sockaddr))); + usleep(100000); // wait for server to handle the msg + s = GetSocketFromServer(0); + ASSERT_EQ(rdma::RdmaEndpoint::UNINIT, s->_rdma_ep->_state); + memcpy(data, "RDMA", 4); + ASSERT_EQ(4, write(sockfd2, data, 4)); // break after magic str + usleep(100000); // wait for server to handle the msg + ASSERT_EQ(rdma::RdmaEndpoint::S_HELLO_WAIT, s->_rdma_ep->_state); + close(sockfd2); + usleep(100000); // wait for server to handle the msg + ASSERT_EQ(NULL, GetSocketFromServer(0)); + + butil::fd_guard sockfd3(socket(AF_INET, SOCK_STREAM, 0)); + ASSERT_TRUE(sockfd3 >= 0); + ASSERT_EQ(0, connect(sockfd3, (sockaddr*)&addr, sizeof(sockaddr))); + usleep(100000); // wait for server to handle the msg + s = GetSocketFromServer(0); + ASSERT_EQ(rdma::RdmaEndpoint::UNINIT, s->_rdma_ep->_state); + memcpy(data, "RDMA", 4); + memset(data + 4, 0, 4); + ASSERT_EQ(8, write(sockfd3, data, 8)); // break after magic str + usleep(100000); // wait for server to handle the msg + ASSERT_EQ(rdma::RdmaEndpoint::S_HELLO_WAIT, s->_rdma_ep->_state); + close(sockfd3); + usleep(100000); // wait for server to handle the msg + ASSERT_EQ(NULL, GetSocketFromServer(0)); + + StopServer(); +} + +TEST_F(RdmaTest, client_hello_msg_invalid_len) { + StartServer(); + + sockaddr_in addr; + bzero((char*)&addr, sizeof(addr)); + addr.sin_family = AF_INET; + addr.sin_port = htons(PORT); + Socket* s = NULL; + uint8_t data[38]; + + butil::fd_guard sockfd1(socket(AF_INET, SOCK_STREAM, 0)); + ASSERT_TRUE(sockfd1 >= 0); + ASSERT_EQ(0, connect(sockfd1, (sockaddr*)&addr, sizeof(sockaddr))); + usleep(100000); // wait for server to handle the msg + s = GetSocketFromServer(0); + ASSERT_EQ(rdma::RdmaEndpoint::UNINIT, s->_rdma_ep->_state); + memcpy(data, "RDMA", 4); + memset(data + 4, 0, 34); + ASSERT_EQ(38, write(sockfd1, data, 38)); // write invalid length + usleep(100000); // wait for server to handle the msg + ASSERT_EQ(NULL, GetSocketFromServer(0)); + + butil::fd_guard sockfd2(socket(AF_INET, SOCK_STREAM, 0)); + ASSERT_TRUE(sockfd2 >= 0); + ASSERT_EQ(0, connect(sockfd2, (sockaddr*)&addr, sizeof(sockaddr))); + usleep(100000); // wait for server to handle the msg + s = GetSocketFromServer(0); + ASSERT_EQ(rdma::RdmaEndpoint::UNINIT, s->_rdma_ep->_state); + memcpy(data, "RDMA", 4); + uint16_t len = butil::HostToNet16(35); + memcpy(data + 4, &len, 2); + memset(data + 6, 0, 32); + ASSERT_EQ(38, write(sockfd2, data, 38)); // write invalid length + usleep(100000); // wait for server to handle the msg + ASSERT_EQ(NULL, GetSocketFromServer(0)); + + StopServer(); +} + +TEST_F(RdmaTest, client_hello_msg_invalid_version) { + StartServer(); + + sockaddr_in addr; + bzero((char*)&addr, sizeof(addr)); + addr.sin_family = AF_INET; + addr.sin_port = htons(PORT); + Socket* s = NULL; + uint8_t data[38]; + uint16_t len = butil::HostToNet16(38); + uint16_t ver = butil::HostToNet16(1); + + butil::fd_guard sockfd1(socket(AF_INET, SOCK_STREAM, 0)); + ASSERT_TRUE(sockfd1 >= 0); + ASSERT_EQ(0, connect(sockfd1, (sockaddr*)&addr, sizeof(sockaddr))); + usleep(100000); // wait for server to handle the msg + s = GetSocketFromServer(0); + ASSERT_EQ(rdma::RdmaEndpoint::UNINIT, s->_rdma_ep->_state); + memcpy(data, "RDMA", 4); + memcpy(data + 4, &len, 2); + memset(data + 6, 0, 32); + memcpy(data + 6, &ver, 2); // hello_ver == 1, impl_ver == 0 + ASSERT_EQ(38, write(sockfd1, data, 38)); + usleep(100000); // wait for server to handle the msg + ASSERT_EQ(rdma::RdmaEndpoint::S_ACK_WAIT, s->_rdma_ep->_state); + memset(data, 0, 4); + ASSERT_EQ(4, write(sockfd1, data, 4)); + usleep(100000); // wait for server to handle the msg + ASSERT_EQ(rdma::RdmaEndpoint::FALLBACK_TCP, s->_rdma_ep->_state); + close(sockfd1); + usleep(100000); + ASSERT_EQ(NULL, GetSocketFromServer(0)); + + butil::fd_guard sockfd2(socket(AF_INET, SOCK_STREAM, 0)); + ASSERT_TRUE(sockfd2 >= 0); + ASSERT_EQ(0, connect(sockfd2, (sockaddr*)&addr, sizeof(sockaddr))); + usleep(100000); // wait for server to handle the msg + s = GetSocketFromServer(0); + ASSERT_EQ(rdma::RdmaEndpoint::UNINIT, s->_rdma_ep->_state); + memcpy(data, "RDMA", 4); + memcpy(data + 4, &len, 2); + memset(data + 6, 0, 32); + memcpy(data + 8, &ver, 2); // hello_ver == 0, impl_ver == 1 + ASSERT_EQ(38, write(sockfd2, data, 38)); + usleep(100000); // wait for server to handle the msg + ASSERT_EQ(rdma::RdmaEndpoint::S_ACK_WAIT, s->_rdma_ep->_state); + uint32_t flag = butil::HostToNet32(1); + ASSERT_EQ(4, write(sockfd2, &flag, 4)); + usleep(100000); + ASSERT_EQ(NULL, GetSocketFromServer(0)); + + StopServer(); +} + +TEST_F(RdmaTest, client_hello_msg_invalid_sq_rq_block_size) { + StartServer(); + + sockaddr_in addr; + bzero((char*)&addr, sizeof(addr)); + addr.sin_family = AF_INET; + addr.sin_port = htons(PORT); + Socket* s = NULL; + rdma::HelloMessage msg; + uint8_t data[38]; + msg.msg_len = 38; + msg.hello_ver = 1; + msg.impl_ver = 1; + + msg.sq_size = 10; + msg.rq_size = 16; + msg.block_size = 8192; + memcpy(data, "RDMA", 4); + msg.Serialize(data + 4); + butil::fd_guard sockfd1(socket(AF_INET, SOCK_STREAM, 0)); + ASSERT_TRUE(sockfd1 >= 0); + ASSERT_EQ(0, connect(sockfd1, (sockaddr*)&addr, sizeof(sockaddr))); + usleep(100000); // wait for server to handle the msg + s = GetSocketFromServer(0); + ASSERT_EQ(rdma::RdmaEndpoint::UNINIT, s->_rdma_ep->_state); + ASSERT_EQ(38, write(sockfd1, data, 38)); + usleep(100000); // wait for server to handle the msg + ASSERT_EQ(rdma::RdmaEndpoint::S_ACK_WAIT, s->_rdma_ep->_state); + memset(data, 0, 4); + ASSERT_EQ(4, write(sockfd1, data, 4)); + usleep(100000); // wait for server to handle the msg + ASSERT_EQ(rdma::RdmaEndpoint::FALLBACK_TCP, s->_rdma_ep->_state); + close(sockfd1); + + msg.sq_size = 16; + msg.rq_size = 10; + msg.block_size = 8192; + memcpy(data, "RDMA", 4); + msg.Serialize(data + 4); + butil::fd_guard sockfd2(socket(AF_INET, SOCK_STREAM, 0)); + ASSERT_TRUE(sockfd2 >= 0); + ASSERT_EQ(0, connect(sockfd2, (sockaddr*)&addr, sizeof(sockaddr))); + usleep(100000); // wait for server to handle the msg + s = GetSocketFromServer(0); + ASSERT_EQ(rdma::RdmaEndpoint::UNINIT, s->_rdma_ep->_state); + ASSERT_EQ(38, write(sockfd2, data, 38)); + usleep(100000); // wait for server to handle the msg + ASSERT_EQ(rdma::RdmaEndpoint::S_ACK_WAIT, s->_rdma_ep->_state); + memset(data, 0, 4); + ASSERT_EQ(4, write(sockfd1, data, 4)); + usleep(100000); // wait for server to handle the msg + ASSERT_EQ(rdma::RdmaEndpoint::FALLBACK_TCP, s->_rdma_ep->_state); + close(sockfd2); + + msg.sq_size = 16; + msg.rq_size = 16; + msg.block_size = 1000; + memcpy(data, "RDMA", 4); + msg.Serialize(data + 4); + butil::fd_guard sockfd3(socket(AF_INET, SOCK_STREAM, 0)); + ASSERT_TRUE(sockfd3 >= 0); + ASSERT_EQ(0, connect(sockfd3, (sockaddr*)&addr, sizeof(sockaddr))); + usleep(100000); // wait for server to handle the msg + s = GetSocketFromServer(0); + ASSERT_EQ(rdma::RdmaEndpoint::UNINIT, s->_rdma_ep->_state); + ASSERT_EQ(38, write(sockfd3, data, 38)); + usleep(100000); // wait for server to handle the msg + ASSERT_EQ(rdma::RdmaEndpoint::S_ACK_WAIT, s->_rdma_ep->_state); + memset(data, 0, 4); + ASSERT_EQ(4, write(sockfd3, data, 4)); + usleep(100000); // wait for server to handle the msg + ASSERT_EQ(rdma::RdmaEndpoint::FALLBACK_TCP, s->_rdma_ep->_state); + + StopServer(); +} + +TEST_F(RdmaTest, client_close_after_qp_build) { + StartServer(); + + sockaddr_in addr; + bzero((char*)&addr, sizeof(addr)); + addr.sin_family = AF_INET; + addr.sin_port = htons(PORT); + Socket* s = NULL; + rdma::HelloMessage msg; + uint8_t data[38]; + msg.msg_len = 38; + msg.hello_ver = 1; + msg.impl_ver = 1; + msg.sq_size = 16; + msg.rq_size = 16; + msg.block_size = 8192; + msg.qp_num = 0; + msg.gid = rdma::GetRdmaGid(); + memcpy(data, "RDMA", 4); + msg.Serialize(data + 4); + + butil::fd_guard sockfd1(socket(AF_INET, SOCK_STREAM, 0)); + ASSERT_TRUE(sockfd1 >= 0); + ASSERT_EQ(0, connect(sockfd1, (sockaddr*)&addr, sizeof(sockaddr))); + usleep(100000); // wait for server to handle the msg + s = GetSocketFromServer(0); + ASSERT_EQ(rdma::RdmaEndpoint::UNINIT, s->_rdma_ep->_state); + ASSERT_EQ(38, write(sockfd1, data, 38)); + usleep(100000); // wait for server to handle the msg + ASSERT_EQ(rdma::RdmaEndpoint::S_ACK_WAIT, s->_rdma_ep->_state); + close(sockfd1); + usleep(100000); // wait for server to handle the msg + ASSERT_EQ(NULL, GetSocketFromServer(0)); + + StopServer(); +} + +TEST_F(RdmaTest, client_close_during_ack_send) { + StartServer(); + + sockaddr_in addr; + bzero((char*)&addr, sizeof(addr)); + addr.sin_family = AF_INET; + addr.sin_port = htons(PORT); + Socket* s = NULL; + rdma::HelloMessage msg; + uint8_t data[38]; + msg.msg_len = 38; + msg.hello_ver = 1; + msg.impl_ver = 1; + msg.sq_size = 16; + msg.rq_size = 16; + msg.block_size = 8192; + msg.qp_num = 0; + msg.gid = rdma::GetRdmaGid(); + memcpy(data, "RDMA", 4); + msg.Serialize(data + 4); + + butil::fd_guard sockfd1(socket(AF_INET, SOCK_STREAM, 0)); + ASSERT_TRUE(sockfd1 >= 0); + ASSERT_EQ(0, connect(sockfd1, (sockaddr*)&addr, sizeof(sockaddr))); + usleep(100000); // wait for server to handle the msg + s = GetSocketFromServer(0); + ASSERT_EQ(rdma::RdmaEndpoint::UNINIT, s->_rdma_ep->_state); + ASSERT_EQ(38, write(sockfd1, data, 38)); + usleep(100000); // wait for server to handle the msg + ASSERT_EQ(rdma::RdmaEndpoint::S_ACK_WAIT, s->_rdma_ep->_state); + uint32_t flags = butil::HostToNet32(0); + ASSERT_EQ(2, write(sockfd1, &flags, 2)); + usleep(100000); // wait for server to handle the msg + ASSERT_EQ(rdma::RdmaEndpoint::S_ACK_WAIT, s->_rdma_ep->_state); + close(sockfd1); + usleep(100000); // wait for server to handle the msg + ASSERT_EQ(NULL, GetSocketFromServer(0)); + + StopServer(); +} + +TEST_F(RdmaTest, client_close_after_ack_send) { + StartServer(); + + sockaddr_in addr; + bzero((char*)&addr, sizeof(addr)); + addr.sin_family = AF_INET; + addr.sin_port = htons(PORT); + Socket* s = NULL; + rdma::HelloMessage msg; + uint8_t data[38]; + msg.msg_len = 38; + msg.hello_ver = 1; + msg.impl_ver = 1; + msg.sq_size = 16; + msg.rq_size = 16; + msg.block_size = 8192; + msg.qp_num = 0; + msg.gid = rdma::GetRdmaGid(); + memcpy(data, "RDMA", 4); + msg.Serialize(data + 4); + + butil::fd_guard sockfd1(socket(AF_INET, SOCK_STREAM, 0)); + ASSERT_TRUE(sockfd1 >= 0); + ASSERT_EQ(0, connect(sockfd1, (sockaddr*)&addr, sizeof(sockaddr))); + usleep(100000); // wait for server to handle the msg + s = GetSocketFromServer(0); + ASSERT_EQ(rdma::RdmaEndpoint::UNINIT, s->_rdma_ep->_state); + ASSERT_EQ(38, write(sockfd1, data, 38)); + usleep(100000); // wait for server to handle the msg + ASSERT_EQ(rdma::RdmaEndpoint::S_ACK_WAIT, s->_rdma_ep->_state); + uint32_t flags = butil::HostToNet32(0); + ASSERT_EQ(4, write(sockfd1, &flags, 4)); + usleep(100000); // wait for server to handle the msg + ASSERT_EQ(rdma::RdmaEndpoint::FALLBACK_TCP, s->_rdma_ep->_state); + close(sockfd1); + usleep(100000); // wait for server to handle the msg + ASSERT_EQ(NULL, GetSocketFromServer(0)); + + butil::fd_guard sockfd2(socket(AF_INET, SOCK_STREAM, 0)); + ASSERT_TRUE(sockfd2 >= 0); + ASSERT_EQ(0, connect(sockfd2, (sockaddr*)&addr, sizeof(sockaddr))); + usleep(100000); // wait for server to handle the msg + s = GetSocketFromServer(0); + ASSERT_EQ(rdma::RdmaEndpoint::UNINIT, s->_rdma_ep->_state); + ASSERT_EQ(38, write(sockfd2, data, 38)); + usleep(100000); // wait for server to handle the msg + ASSERT_EQ(rdma::RdmaEndpoint::S_ACK_WAIT, s->_rdma_ep->_state); + flags = butil::HostToNet32(1); + ASSERT_EQ(4, write(sockfd2, &flags, 4)); + usleep(100000); // wait for server to handle the msg + ASSERT_EQ(rdma::RdmaEndpoint::ESTABLISHED, s->_rdma_ep->_state); + close(sockfd2); + usleep(100000); // wait for server to handle the msg + ASSERT_EQ(NULL, GetSocketFromServer(0)); + + StopServer(); +} + +TEST_F(RdmaTest, client_send_data_on_tcp_after_ack_send) { + StartServer(); + + sockaddr_in addr; + bzero((char*)&addr, sizeof(addr)); + addr.sin_family = AF_INET; + addr.sin_port = htons(PORT); + Socket* s = NULL; + rdma::HelloMessage msg; + uint8_t data[38]; + msg.msg_len = 38; + msg.hello_ver = 1; + msg.impl_ver = 1; + msg.sq_size = 16; + msg.rq_size = 16; + msg.block_size = 8192; + msg.qp_num = 0; + msg.gid = rdma::GetRdmaGid(); + memcpy(data, "RDMA", 4); + msg.Serialize(data + 4); + + butil::fd_guard sockfd1(socket(AF_INET, SOCK_STREAM, 0)); + ASSERT_TRUE(sockfd1 >= 0); + ASSERT_EQ(0, connect(sockfd1, (sockaddr*)&addr, sizeof(sockaddr))); + usleep(100000); // wait for server to handle the msg + s = GetSocketFromServer(0); + ASSERT_EQ(rdma::RdmaEndpoint::UNINIT, s->_rdma_ep->_state); + ASSERT_EQ(38, write(sockfd1, data, 38)); + usleep(100000); // wait for server to handle the msg + ASSERT_EQ(rdma::RdmaEndpoint::S_ACK_WAIT, s->_rdma_ep->_state); + uint32_t flags = butil::HostToNet32(0); + ASSERT_EQ(4, write(sockfd1, &flags, 4)); + usleep(100000); // wait for server to handle the msg + ASSERT_EQ(rdma::RdmaEndpoint::FALLBACK_TCP, s->_rdma_ep->_state); + ASSERT_EQ(4, write(sockfd1, &flags, 4)); + usleep(100000); + ASSERT_EQ(rdma::RdmaEndpoint::FALLBACK_TCP, s->_rdma_ep->_state); + close(sockfd1); + usleep(100000); // wait for server to handle the msg + ASSERT_EQ(NULL, GetSocketFromServer(0)); + + butil::fd_guard sockfd2(socket(AF_INET, SOCK_STREAM, 0)); + ASSERT_TRUE(sockfd2 >= 0); + ASSERT_EQ(0, connect(sockfd2, (sockaddr*)&addr, sizeof(sockaddr))); + usleep(100000); // wait for server to handle the msg + s = GetSocketFromServer(0); + ASSERT_EQ(rdma::RdmaEndpoint::UNINIT, s->_rdma_ep->_state); + ASSERT_EQ(38, write(sockfd2, data, 38)); + usleep(100000); // wait for server to handle the msg + ASSERT_EQ(rdma::RdmaEndpoint::S_ACK_WAIT, s->_rdma_ep->_state); + flags = butil::HostToNet32(1); + ASSERT_EQ(4, write(sockfd2, &flags, 4)); + usleep(100000); // wait for server to handle the msg + ASSERT_EQ(rdma::RdmaEndpoint::ESTABLISHED, s->_rdma_ep->_state); + ASSERT_EQ(4, write(sockfd1, &flags, 4)); + usleep(100000); + ASSERT_EQ(NULL, GetSocketFromServer(0)); + + StopServer(); +} + +TEST_F(RdmaTest, server_miss_before_hello_send) { + butil::fd_guard sockfd(butil::tcp_listen(g_ep)); + EXPECT_TRUE(sockfd >= 0); + + Channel channel; + ChannelOptions chan_options; + chan_options.use_rdma = true; + chan_options.connect_timeout_ms = 500; + chan_options.timeout_ms = 500; + chan_options.max_retry = 0; + ASSERT_EQ(0, channel.Init(g_ep, &chan_options)); + + Controller cntl; + test::EchoRequest req; + test::EchoResponse res; + req.set_message(__FUNCTION__); + google::protobuf::Closure* done = DoNothing(); + ::test::EchoService::Stub(&channel).Echo(&cntl, &req, &res, done); + + usleep(100000); + SocketUniquePtr s; + ASSERT_EQ(0, Socket::Address(cntl._single_server_id, &s)); + ASSERT_EQ(rdma::RdmaEndpoint::C_HELLO_WAIT, s->_rdma_ep->_state); + + butil::fd_guard acc_fd(accept(sockfd, NULL, NULL)); + ASSERT_TRUE(acc_fd >= 0); + bthread_id_join(cntl.call_id()); + + ASSERT_EQ(ERPCTIMEDOUT, cntl.ErrorCode()); +} + +TEST_F(RdmaTest, server_close_before_hello_send) { + butil::fd_guard sockfd(butil::tcp_listen(g_ep)); + EXPECT_TRUE(sockfd >= 0); + + Channel channel; + ChannelOptions chan_options; + chan_options.use_rdma = true; + chan_options.connect_timeout_ms = 500; + chan_options.timeout_ms = 500; + chan_options.max_retry = 0; + ASSERT_EQ(0, channel.Init(g_ep, &chan_options)); + + Controller cntl; + test::EchoRequest req; + test::EchoResponse res; + req.set_message(__FUNCTION__); + google::protobuf::Closure* done = DoNothing(); + ::test::EchoService::Stub(&channel).Echo(&cntl, &req, &res, done); + + usleep(100000); + SocketUniquePtr s; + ASSERT_EQ(0, Socket::Address(cntl._single_server_id, &s)); + ASSERT_EQ(rdma::RdmaEndpoint::C_HELLO_WAIT, s->_rdma_ep->_state); + + butil::fd_guard acc_fd(accept(sockfd, NULL, NULL)); + ASSERT_TRUE(acc_fd >= 0); + uint8_t data[38]; + ASSERT_EQ(38, read(acc_fd, data, 38)); + close(acc_fd); + usleep(100000); + ASSERT_EQ(rdma::RdmaEndpoint::FAILED, s->_rdma_ep->_state); + bthread_id_join(cntl.call_id()); + + ASSERT_EQ(EEOF, cntl.ErrorCode()); +} + +TEST_F(RdmaTest, server_miss_during_magic_str) { + butil::fd_guard sockfd(butil::tcp_listen(g_ep)); + EXPECT_TRUE(sockfd >= 0); + + Channel channel; + ChannelOptions chan_options; + chan_options.use_rdma = true; + chan_options.connect_timeout_ms = 500; + chan_options.timeout_ms = 500; + chan_options.max_retry = 0; + ASSERT_EQ(0, channel.Init(g_ep, &chan_options)); + + Controller cntl; + test::EchoRequest req; + test::EchoResponse res; + req.set_message(__FUNCTION__); + google::protobuf::Closure* done = DoNothing(); + ::test::EchoService::Stub(&channel).Echo(&cntl, &req, &res, done); + + usleep(100000); + SocketUniquePtr s; + ASSERT_EQ(0, Socket::Address(cntl._single_server_id, &s)); + ASSERT_EQ(rdma::RdmaEndpoint::C_HELLO_WAIT, s->_rdma_ep->_state); + + butil::fd_guard acc_fd(accept(sockfd, NULL, NULL)); + ASSERT_TRUE(acc_fd >= 0); + uint8_t data[38]; + ASSERT_EQ(38, read(acc_fd, data, 38)); + ASSERT_EQ(2, write(acc_fd, "RD", 2)); + usleep(100000); + bthread_id_join(cntl.call_id()); + + ASSERT_EQ(ERPCTIMEDOUT, cntl.ErrorCode()); +} + +TEST_F(RdmaTest, server_close_during_magic_str) { + butil::fd_guard sockfd(butil::tcp_listen(g_ep)); + EXPECT_TRUE(sockfd >= 0); + + Channel channel; + ChannelOptions chan_options; + chan_options.use_rdma = true; + chan_options.connect_timeout_ms = 500; + chan_options.timeout_ms = 500; + chan_options.max_retry = 0; + ASSERT_EQ(0, channel.Init(g_ep, &chan_options)); + + Controller cntl; + test::EchoRequest req; + test::EchoResponse res; + req.set_message(__FUNCTION__); + google::protobuf::Closure* done = DoNothing(); + ::test::EchoService::Stub(&channel).Echo(&cntl, &req, &res, done); + + usleep(100000); + SocketUniquePtr s; + ASSERT_EQ(0, Socket::Address(cntl._single_server_id, &s)); + ASSERT_EQ(rdma::RdmaEndpoint::C_HELLO_WAIT, s->_rdma_ep->_state); + + butil::fd_guard acc_fd(accept(sockfd, NULL, NULL)); + ASSERT_TRUE(acc_fd >= 0); + uint8_t data[38]; + ASSERT_EQ(38, read(acc_fd, data, 38)); + ASSERT_EQ(2, write(acc_fd, "RD", 2)); + close(acc_fd); + usleep(100000); + ASSERT_EQ(rdma::RdmaEndpoint::FAILED, s->_rdma_ep->_state); + bthread_id_join(cntl.call_id()); + + ASSERT_EQ(EEOF, cntl.ErrorCode()); +} + +TEST_F(RdmaTest, server_hello_invalid_magic_str) { + butil::fd_guard sockfd(butil::tcp_listen(g_ep)); + EXPECT_TRUE(sockfd >= 0); + + Channel channel; + ChannelOptions chan_options; + chan_options.use_rdma = true; + chan_options.connect_timeout_ms = 500; + chan_options.timeout_ms = 500; + chan_options.max_retry = 0; + ASSERT_EQ(0, channel.Init(g_ep, &chan_options)); + + Controller cntl; + test::EchoRequest req; + test::EchoResponse res; + req.set_message(__FUNCTION__); + google::protobuf::Closure* done = DoNothing(); + ::test::EchoService::Stub(&channel).Echo(&cntl, &req, &res, done); + + usleep(100000); + SocketUniquePtr s; + ASSERT_EQ(0, Socket::Address(cntl._single_server_id, &s)); + ASSERT_EQ(rdma::RdmaEndpoint::C_HELLO_WAIT, s->_rdma_ep->_state); + + butil::fd_guard acc_fd(accept(sockfd, NULL, NULL)); + ASSERT_TRUE(acc_fd >= 0); + uint8_t data[38]; + ASSERT_EQ(38, read(acc_fd, data, 38)); + ASSERT_EQ(4, write(acc_fd, "ABCD", 4)); + usleep(100000); + ASSERT_EQ(rdma::RdmaEndpoint::FAILED, s->_rdma_ep->_state); + bthread_id_join(cntl.call_id()); + + ASSERT_EQ(EPROTO, cntl.ErrorCode()); +} + +TEST_F(RdmaTest, server_miss_during_hello_msg) { + butil::fd_guard sockfd(butil::tcp_listen(g_ep)); + EXPECT_TRUE(sockfd >= 0); + + Channel channel; + ChannelOptions chan_options; + chan_options.use_rdma = true; + chan_options.connect_timeout_ms = 500; + chan_options.timeout_ms = 500; + chan_options.max_retry = 0; + ASSERT_EQ(0, channel.Init(g_ep, &chan_options)); + + Controller cntl; + test::EchoRequest req; + test::EchoResponse res; + req.set_message(__FUNCTION__); + google::protobuf::Closure* done = DoNothing(); + ::test::EchoService::Stub(&channel).Echo(&cntl, &req, &res, done); + + usleep(100000); + SocketUniquePtr s; + ASSERT_EQ(0, Socket::Address(cntl._single_server_id, &s)); + ASSERT_EQ(rdma::RdmaEndpoint::C_HELLO_WAIT, s->_rdma_ep->_state); + + butil::fd_guard acc_fd(accept(sockfd, NULL, NULL)); + ASSERT_TRUE(acc_fd >= 0); + uint8_t data[38]; + ASSERT_EQ(38, read(acc_fd, data, 38)); + ASSERT_EQ(4, write(acc_fd, "RDMA", 4)); + ASSERT_EQ(2, write(acc_fd, "00", 2)); + bthread_id_join(cntl.call_id()); + + ASSERT_EQ(ERPCTIMEDOUT, cntl.ErrorCode()); +} + +TEST_F(RdmaTest, server_close_during_hello_msg) { + butil::fd_guard sockfd(butil::tcp_listen(g_ep)); + EXPECT_TRUE(sockfd >= 0); + + Channel channel; + ChannelOptions chan_options; + chan_options.use_rdma = true; + chan_options.connect_timeout_ms = 500; + chan_options.timeout_ms = 500; + chan_options.max_retry = 0; + ASSERT_EQ(0, channel.Init(g_ep, &chan_options)); + + Controller cntl; + test::EchoRequest req; + test::EchoResponse res; + req.set_message(__FUNCTION__); + google::protobuf::Closure* done = DoNothing(); + ::test::EchoService::Stub(&channel).Echo(&cntl, &req, &res, done); + + usleep(100000); + SocketUniquePtr s; + ASSERT_EQ(0, Socket::Address(cntl._single_server_id, &s)); + ASSERT_EQ(rdma::RdmaEndpoint::C_HELLO_WAIT, s->_rdma_ep->_state); + + butil::fd_guard acc_fd(accept(sockfd, NULL, NULL)); + ASSERT_TRUE(acc_fd >= 0); + uint8_t data[38]; + ASSERT_EQ(38, read(acc_fd, data, 38)); + ASSERT_EQ(4, write(acc_fd, "RDMA", 4)); + ASSERT_EQ(2, write(acc_fd, "00", 2)); + close(acc_fd); + usleep(100000); + ASSERT_EQ(rdma::RdmaEndpoint::FAILED, s->_rdma_ep->_state); + bthread_id_join(cntl.call_id()); + + ASSERT_EQ(EEOF, cntl.ErrorCode()); +} + +TEST_F(RdmaTest, server_hello_invalid_msg_len) { + butil::fd_guard sockfd(butil::tcp_listen(g_ep)); + EXPECT_TRUE(sockfd >= 0); + + Channel channel; + ChannelOptions chan_options; + chan_options.use_rdma = true; + chan_options.connect_timeout_ms = 500; + chan_options.timeout_ms = 500; + chan_options.max_retry = 0; + ASSERT_EQ(0, channel.Init(g_ep, &chan_options)); + + Controller cntl; + test::EchoRequest req; + test::EchoResponse res; + req.set_message(__FUNCTION__); + google::protobuf::Closure* done = DoNothing(); + ::test::EchoService::Stub(&channel).Echo(&cntl, &req, &res, done); + + usleep(100000); + SocketUniquePtr s; + ASSERT_EQ(0, Socket::Address(cntl._single_server_id, &s)); + ASSERT_EQ(rdma::RdmaEndpoint::C_HELLO_WAIT, s->_rdma_ep->_state); + + butil::fd_guard acc_fd(accept(sockfd, NULL, NULL)); + ASSERT_TRUE(acc_fd >= 0); + uint8_t data[38]; + ASSERT_EQ(38, read(acc_fd, data, 38)); + memcpy(data, "RDMA", 4); + uint16_t len = butil::HostToNet16(35); + memcpy(data + 4, &len, 2); + memset(data + 6, 0, 32); + ASSERT_EQ(38, write(acc_fd, data, 38)); + usleep(100000); + ASSERT_EQ(rdma::RdmaEndpoint::FAILED, s->_rdma_ep->_state); + bthread_id_join(cntl.call_id()); + + ASSERT_EQ(EPROTO, cntl.ErrorCode()); +} + +TEST_F(RdmaTest, server_hello_invalid_version) { + butil::fd_guard sockfd(butil::tcp_listen(g_ep)); + EXPECT_TRUE(sockfd >= 0); + + Channel channel; + ChannelOptions chan_options; + chan_options.use_rdma = true; + chan_options.connect_timeout_ms = 500; + chan_options.timeout_ms = 500; + chan_options.max_retry = 0; + ASSERT_EQ(0, channel.Init(g_ep, &chan_options)); + + Controller cntl; + test::EchoRequest req; + test::EchoResponse res; + req.set_message(__FUNCTION__); + google::protobuf::Closure* done = DoNothing(); + ::test::EchoService::Stub(&channel).Echo(&cntl, &req, &res, done); + + usleep(100000); + SocketUniquePtr s; + ASSERT_EQ(0, Socket::Address(cntl._single_server_id, &s)); + ASSERT_EQ(rdma::RdmaEndpoint::C_HELLO_WAIT, s->_rdma_ep->_state); + + butil::fd_guard acc_fd(accept(sockfd, NULL, NULL)); + ASSERT_TRUE(acc_fd >= 0); + uint8_t data[38]; + ASSERT_EQ(38, read(acc_fd, data, 38)); + memcpy(data, "RDMA", 4); + uint16_t len = butil::HostToNet16(38); + memcpy(data + 4, &len, 2); + memset(data + 6, 0, 32); + ASSERT_EQ(38, write(acc_fd, data, 38)); + usleep(100000); + ASSERT_EQ(rdma::RdmaEndpoint::FALLBACK_TCP, s->_rdma_ep->_state); + ASSERT_EQ(4, read(acc_fd, data, 4)); + ASSERT_EQ(0, butil::NetToHost32(*(uint32_t*)data)); + bthread_id_join(cntl.call_id()); + + ASSERT_EQ(ERPCTIMEDOUT, cntl.ErrorCode()); +} + +TEST_F(RdmaTest, server_hello_invalid_sq_rq_size) { + butil::fd_guard sockfd(butil::tcp_listen(g_ep)); + EXPECT_TRUE(sockfd >= 0); + + Channel channel; + ChannelOptions chan_options; + chan_options.use_rdma = true; + chan_options.connect_timeout_ms = 500; + chan_options.timeout_ms = 500; + chan_options.max_retry = 0; + ASSERT_EQ(0, channel.Init(g_ep, &chan_options)); + + Controller cntl; + test::EchoRequest req; + test::EchoResponse res; + req.set_message(__FUNCTION__); + google::protobuf::Closure* done = DoNothing(); + ::test::EchoService::Stub(&channel).Echo(&cntl, &req, &res, done); + + usleep(100000); + SocketUniquePtr s; + ASSERT_EQ(0, Socket::Address(cntl._single_server_id, &s)); + ASSERT_EQ(rdma::RdmaEndpoint::C_HELLO_WAIT, s->_rdma_ep->_state); + + butil::fd_guard acc_fd(accept(sockfd, NULL, NULL)); + ASSERT_TRUE(acc_fd >= 0); + uint8_t data[38]; + ASSERT_EQ(38, read(acc_fd, data, 38)); + + rdma::HelloMessage msg; + msg.msg_len = 38; + msg.hello_ver = 1; + msg.impl_ver = 1; + msg.sq_size = 0; + msg.rq_size = 0; + msg.block_size = 8192; + msg.qp_num = 0; + msg.gid = rdma::GetRdmaGid(); + memcpy(data, "RDMA", 4); + msg.Serialize(data + 4); + ASSERT_EQ(38, write(acc_fd, data, 38)); + + usleep(100000); + ASSERT_EQ(rdma::RdmaEndpoint::FALLBACK_TCP, s->_rdma_ep->_state); + ASSERT_EQ(4, read(acc_fd, data, 4)); + ASSERT_EQ(0, butil::NetToHost32(*(uint32_t*)data)); + bthread_id_join(cntl.call_id()); + + ASSERT_EQ(ERPCTIMEDOUT, cntl.ErrorCode()); +} + +TEST_F(RdmaTest, server_miss_after_ack) { + butil::fd_guard sockfd(butil::tcp_listen(g_ep)); + EXPECT_TRUE(sockfd >= 0); + + Channel channel; + ChannelOptions chan_options; + chan_options.use_rdma = true; + chan_options.connect_timeout_ms = 500; + chan_options.timeout_ms = 500; + chan_options.max_retry = 0; + ASSERT_EQ(0, channel.Init(g_ep, &chan_options)); + + Controller cntl; + test::EchoRequest req; + test::EchoResponse res; + req.set_message(__FUNCTION__); + google::protobuf::Closure* done = DoNothing(); + ::test::EchoService::Stub(&channel).Echo(&cntl, &req, &res, done); + + usleep(100000); + SocketUniquePtr s; + ASSERT_EQ(0, Socket::Address(cntl._single_server_id, &s)); + ASSERT_EQ(rdma::RdmaEndpoint::C_HELLO_WAIT, s->_rdma_ep->_state); + + butil::fd_guard acc_fd(accept(sockfd, NULL, NULL)); + ASSERT_TRUE(acc_fd >= 0); + uint8_t data[38]; + ASSERT_EQ(38, read(acc_fd, data, 38)); + + rdma::HelloMessage msg; + msg.msg_len = 38; + msg.hello_ver = 1; + msg.impl_ver = 1; + msg.sq_size = 16; + msg.rq_size = 16; + msg.block_size = 8192; + msg.qp_num = 0; + msg.gid = rdma::GetRdmaGid(); + memcpy(data, "RDMA", 4); + msg.Serialize(data + 4); + ASSERT_EQ(38, write(acc_fd, data, 38)); + + usleep(100000); + ASSERT_EQ(rdma::RdmaEndpoint::ESTABLISHED, s->_rdma_ep->_state); + ASSERT_EQ(4, read(acc_fd, data, 4)); + ASSERT_EQ(1, butil::NetToHost32(*(uint32_t*)data)); + bthread_id_join(cntl.call_id()); + + ASSERT_EQ(ERPCTIMEDOUT, cntl.ErrorCode()); +} + +TEST_F(RdmaTest, server_close_after_ack) { + butil::fd_guard sockfd(butil::tcp_listen(g_ep)); + EXPECT_TRUE(sockfd >= 0); + + Channel channel; + ChannelOptions chan_options; + chan_options.use_rdma = true; + chan_options.connect_timeout_ms = 500; + chan_options.timeout_ms = 500; + chan_options.max_retry = 0; + ASSERT_EQ(0, channel.Init(g_ep, &chan_options)); + + Controller cntl; + test::EchoRequest req; + test::EchoResponse res; + req.set_message(__FUNCTION__); + google::protobuf::Closure* done = DoNothing(); + ::test::EchoService::Stub(&channel).Echo(&cntl, &req, &res, done); + + usleep(100000); + SocketUniquePtr s; + ASSERT_EQ(0, Socket::Address(cntl._single_server_id, &s)); + ASSERT_EQ(rdma::RdmaEndpoint::C_HELLO_WAIT, s->_rdma_ep->_state); + + butil::fd_guard acc_fd(accept(sockfd, NULL, NULL)); + ASSERT_TRUE(acc_fd >= 0); + uint8_t data[38]; + ASSERT_EQ(38, read(acc_fd, data, 38)); + + rdma::HelloMessage msg; + msg.msg_len = 38; + msg.hello_ver = 1; + msg.impl_ver = 1; + msg.sq_size = 16; + msg.rq_size = 16; + msg.block_size = 8192; + msg.qp_num = 0; + msg.gid = rdma::GetRdmaGid(); + memcpy(data, "RDMA", 4); + msg.Serialize(data + 4); + ASSERT_EQ(38, write(acc_fd, data, 38)); + + usleep(100000); + ASSERT_EQ(rdma::RdmaEndpoint::ESTABLISHED, s->_rdma_ep->_state); + ASSERT_EQ(4, read(acc_fd, data, 4)); + ASSERT_EQ(1, butil::NetToHost32(*(uint32_t*)data)); + close(acc_fd); + bthread_id_join(cntl.call_id()); + + ASSERT_EQ(EEOF, cntl.ErrorCode()); +} + +TEST_F(RdmaTest, server_send_data_on_tcp_after_ack) { + butil::fd_guard sockfd(butil::tcp_listen(g_ep)); + EXPECT_TRUE(sockfd >= 0); + + Channel channel; + ChannelOptions chan_options; + chan_options.use_rdma = true; + chan_options.connect_timeout_ms = 500; + chan_options.timeout_ms = 500; + chan_options.max_retry = 0; + ASSERT_EQ(0, channel.Init(g_ep, &chan_options)); + + Controller cntl; + test::EchoRequest req; + test::EchoResponse res; + req.set_message(__FUNCTION__); + google::protobuf::Closure* done = DoNothing(); + ::test::EchoService::Stub(&channel).Echo(&cntl, &req, &res, done); + + usleep(100000); + SocketUniquePtr s; + ASSERT_EQ(0, Socket::Address(cntl._single_server_id, &s)); + ASSERT_EQ(rdma::RdmaEndpoint::C_HELLO_WAIT, s->_rdma_ep->_state); + + butil::fd_guard acc_fd(accept(sockfd, NULL, NULL)); + ASSERT_TRUE(acc_fd >= 0); + uint8_t data[38]; + ASSERT_EQ(38, read(acc_fd, data, 38)); + + rdma::HelloMessage msg; + msg.msg_len = 38; + msg.hello_ver = 1; + msg.impl_ver = 1; + msg.sq_size = 16; + msg.rq_size = 16; + msg.block_size = 8192; + msg.qp_num = 0; + msg.gid = rdma::GetRdmaGid(); + memcpy(data, "RDMA", 4); + msg.Serialize(data + 4); + ASSERT_EQ(38, write(acc_fd, data, 38)); + + usleep(100000); + ASSERT_EQ(rdma::RdmaEndpoint::ESTABLISHED, s->_rdma_ep->_state); + ASSERT_EQ(38, write(acc_fd, data, 38)); + bthread_id_join(cntl.call_id()); + + ASSERT_EQ(EPROTO, cntl.ErrorCode()); +} + +TEST_F(RdmaTest, try_global_disable_rdma) { + StartServer(); + rdma::g_rdma_available.store(false, butil::memory_order_relaxed); + + Channel channel; + ChannelOptions chan_options; + chan_options.use_rdma = true; + chan_options.connect_timeout_ms = 500; + chan_options.timeout_ms = 500; + chan_options.max_retry = 0; + ASSERT_EQ(0, channel.Init(g_ep, &chan_options)); + Controller cntl; + test::EchoRequest req; + test::EchoResponse res; + + req.set_message(__FUNCTION__); + req.set_sleep_us(200000); + google::protobuf::Closure* done = DoNothing(); + ::test::EchoService::Stub(&channel).Echo(&cntl, &req, &res, done); + usleep(100000); + SocketUniquePtr s; + ASSERT_EQ(0, Socket::Address(cntl._single_server_id, &s)); + ASSERT_EQ(rdma::RdmaEndpoint::FALLBACK_TCP, s->_rdma_ep->_state); + bthread_id_join(cntl.call_id()); + ASSERT_EQ(0, cntl.ErrorCode()); + + StopServer(); + rdma::g_rdma_available.store(true, butil::memory_order_relaxed); +} + +TEST_F(RdmaTest, server_option_invalid) { + Server server; + ServerOptions options; + options.use_rdma = true; + + // rtmp and rdma are incompatible + options.rtmp_service = (RtmpService*)1; + ASSERT_EQ(-1, server.Start(PORT, &options)); + + // nshead and rdma are incompatible + options.rtmp_service = NULL; + options.nshead_service = (NsheadService*)1; + ASSERT_EQ(-1, server.Start(PORT, &options)); + + // mongo and rdma are incompatible + options.nshead_service = NULL; + options.mongo_service_adaptor = (MongoServiceAdaptor*)1; + ASSERT_EQ(-1, server.Start(PORT, &options)); + + // ssl and rdma are incompatible + options.mongo_service_adaptor = NULL; + options.mutable_ssl_options()->default_cert.certificate = "test"; + ASSERT_EQ(-1, server.Start(PORT, &options)); +} + +TEST_F(RdmaTest, channel_option_invalid) { + Channel channel; + ChannelOptions chan_options; + chan_options.use_rdma = true; + + // rtmp and rdma are incompatible + chan_options.protocol = "rtmp"; + ASSERT_EQ(-1, channel.Init(g_ep, &chan_options)); + + chan_options.protocol = "streaming_rpc"; + ASSERT_EQ(-1, channel.Init(g_ep, &chan_options)); + + // nshead and rdma are incompatible + chan_options.protocol = "nshead"; + ASSERT_EQ(-1, channel.Init(g_ep, &chan_options)); + chan_options.protocol = "nshead_mcpack"; + ASSERT_EQ(-1, channel.Init(g_ep, &chan_options)); + + // nova_pbrpc and rdma are incompatible + chan_options.protocol = "nova_pbrpc"; + ASSERT_EQ(-1, channel.Init(g_ep, &chan_options)); + + // public_pbrpc and rdma are incompatible + chan_options.protocol = "public_pbrpc"; + ASSERT_EQ(-1, channel.Init(g_ep, &chan_options)); + + // redis and rdma are incompatible + chan_options.protocol = "redis"; + ASSERT_EQ(-1, channel.Init(g_ep, &chan_options)); + + // memcache and rdma are incompatible + chan_options.protocol = "memcache"; + ASSERT_EQ(-1, channel.Init(g_ep, &chan_options)); + + // ubrpc and rdma are incompatible + chan_options.protocol = "ubrpc_compack"; + ASSERT_EQ(-1, channel.Init(g_ep, &chan_options)); + + // itp and rdma are incompatible + chan_options.protocol = "itp"; + ASSERT_EQ(-1, channel.Init(g_ep, &chan_options)); + + // esp and rdma are incompatible + chan_options.protocol = "esp"; + ASSERT_EQ(-1, channel.Init(g_ep, &chan_options)); + + // hulu_pbrpc and rdma are incompatible + chan_options.protocol = "hulu_pbrpc"; + ASSERT_EQ(-1, channel.Init(g_ep, &chan_options)); + + // sofa_pbrpc and rdma are incompatible + chan_options.protocol = "sofa_pbrpc"; + ASSERT_EQ(-1, channel.Init(g_ep, &chan_options)); + + // http and rdma are incompatible + chan_options.protocol = "http"; + ASSERT_EQ(-1, channel.Init(g_ep, &chan_options)); + + // ssl and rdma are incompatible + chan_options.protocol = "baidu_std"; + chan_options.mutable_ssl_options()->sni_name = "test"; + ASSERT_EQ(-1, channel.Init(g_ep, &chan_options)); +} + +TEST_F(RdmaTest, rdma_client_to_rdma_server) { + if (!FLAGS_rdma_test_enable) { + return; + } + + StartServer(); + + Channel channel; + ChannelOptions chan_options; + chan_options.use_rdma = true; + chan_options.connect_timeout_ms = 500; + chan_options.timeout_ms = 500; + chan_options.max_retry = 0; + ASSERT_EQ(0, channel.Init(g_ep, &chan_options)); + Controller cntl; + test::EchoRequest req; + test::EchoResponse res; + req.set_message(__FUNCTION__); + google::protobuf::Closure* done = DoNothing(); + ::test::EchoService::Stub(&channel).Echo(&cntl, &req, &res, done); + usleep(100000); + bthread_id_join(cntl.call_id()); + ASSERT_EQ(0, cntl.ErrorCode()); + + StopServer(); +} + +TEST_F(RdmaTest, tcp_client_to_tcp_server) { + StartServer(false); + + Channel channel; + ChannelOptions chan_options; + chan_options.connect_timeout_ms = 500; + chan_options.timeout_ms = 500; + chan_options.max_retry = 0; + ASSERT_EQ(0, channel.Init(g_ep, &chan_options)); + Controller cntl; + test::EchoRequest req; + test::EchoResponse res; + req.set_message(__FUNCTION__); + google::protobuf::Closure* done = DoNothing(); + ::test::EchoService::Stub(&channel).Echo(&cntl, &req, &res, done); + usleep(100000); + bthread_id_join(cntl.call_id()); + ASSERT_EQ(0, cntl.ErrorCode()); + + StopServer(); +} + +TEST_F(RdmaTest, tcp_client_to_rdma_server) { + StartServer(); + + Channel channel; + ChannelOptions chan_options; + chan_options.connect_timeout_ms = 500; + chan_options.timeout_ms = 500; + chan_options.max_retry = 0; + ASSERT_EQ(0, channel.Init(g_ep, &chan_options)); + Controller cntl; + test::EchoRequest req; + test::EchoResponse res; + req.set_message(__FUNCTION__); + google::protobuf::Closure* done = DoNothing(); + ::test::EchoService::Stub(&channel).Echo(&cntl, &req, &res, done); + usleep(100000); + bthread_id_join(cntl.call_id()); + ASSERT_EQ(0, cntl.ErrorCode()); + + StopServer(); +} + +TEST_F(RdmaTest, rdma_client_to_tcp_server) { + StartServer(false); + + Channel channel; + ChannelOptions chan_options; + chan_options.use_rdma = true; + chan_options.connect_timeout_ms = 500; + chan_options.timeout_ms = 500; + chan_options.max_retry = 0; + ASSERT_EQ(0, channel.Init(g_ep, &chan_options)); + Controller cntl; + test::EchoRequest req; + test::EchoResponse res; + req.set_message(__FUNCTION__); + google::protobuf::Closure* done = DoNothing(); + ::test::EchoService::Stub(&channel).Echo(&cntl, &req, &res, done); + usleep(100000); + bthread_id_join(cntl.call_id()); + ASSERT_EQ(EEOF, cntl.ErrorCode()); + + StopServer(); +} + +static const int RPC_NUM = 1024; + +void DumpRdmaEndpointInfo(Socket* client, Socket* server) { + std::cout << std::endl << "client:"; + client->_rdma_ep->DebugInfo(std::cout); + std::cout << std::endl << "server:"; + server->_rdma_ep->DebugInfo(std::cout); +} + +TEST_F(RdmaTest, send_rpcs_in_one_qp) { + if (!FLAGS_rdma_test_enable) { + return; + } + + StartServer(); + + Channel channel; + ChannelOptions chan_options; + chan_options.use_rdma = true; + chan_options.connect_timeout_ms = 500; + chan_options.timeout_ms = 3000; + chan_options.max_retry = 0; + ASSERT_EQ(0, channel.Init(g_ep, &chan_options)); + Controller cntl[RPC_NUM]; + test::EchoRequest req[RPC_NUM]; + test::EchoResponse res[RPC_NUM]; + + LOG(INFO) << "send 0 attachment"; + for (int i = 0; i < RPC_NUM; ++i) { + req[i].set_message(__FUNCTION__); + google::protobuf::Closure* done = DoNothing(); + ::test::EchoService::Stub(&channel).Echo(&cntl[i], &req[i], &res[i], done); + } + for (int i = 0; i < RPC_NUM; ++i) { + bthread_id_join(cntl[i].call_id()); + if (cntl[i].ErrorCode() == ERPCTIMEDOUT) { + SocketUniquePtr s; + ASSERT_EQ(0, Socket::Address(cntl[i]._single_server_id, &s)); + Socket* m = GetSocketFromServer(0); + DumpRdmaEndpointInfo(s.get(), m); + } + ASSERT_EQ(0, cntl[i].ErrorCode()) << "req[" << i << "]"; + } + + LOG(INFO) << "send 4KB attachment"; + butil::IOBuf attach; + attach.resize(4096); + for (int i = 0; i < RPC_NUM; ++i) { + cntl[i].Reset(); + cntl[i].request_attachment().append(attach); + google::protobuf::Closure* done = DoNothing(); + ::test::EchoService::Stub(&channel).Echo(&cntl[i], &req[i], &res[i], done); + } + for (int i = 0; i < RPC_NUM; ++i) { + bthread_id_join(cntl[i].call_id()); + if (cntl[i].ErrorCode() == ERPCTIMEDOUT) { + SocketUniquePtr s; + ASSERT_EQ(0, Socket::Address(cntl[i]._single_server_id, &s)); + Socket* m = GetSocketFromServer(0); + DumpRdmaEndpointInfo(s.get(), m); + } + ASSERT_EQ(0, cntl[i].ErrorCode()) << "req[" << i << "]"; + } + + LOG(INFO) << "send 1MB attachment"; + attach.resize(1048576); + for (int i = 0; i < RPC_NUM; ++i) { + cntl[i].Reset(); + cntl[i].request_attachment().append(attach); + google::protobuf::Closure* done = DoNothing(); + ::test::EchoService::Stub(&channel).Echo(&cntl[i], &req[i], &res[i], done); + } + for (int i = 0; i < RPC_NUM; ++i) { + bthread_id_join(cntl[i].call_id()); + if (cntl[i].ErrorCode() == ERPCTIMEDOUT) { + SocketUniquePtr s; + ASSERT_EQ(0, Socket::Address(cntl[i]._single_server_id, &s)); + Socket* m = GetSocketFromServer(0); + DumpRdmaEndpointInfo(s.get(), m); + } + ASSERT_TRUE(0 == cntl[i].ErrorCode() || + EOVERCROWDED == cntl[i].ErrorCode()) << "req[" << i << "]"; + } + + StopServer(); +} + +TEST_F(RdmaTest, send_rpc_in_many_qp) { + if (!FLAGS_rdma_test_enable) { + return; + } + + Server server[100]; + MyEchoService svc[100]; + int num = 100; + for (int i = 0; i < num; ++i) { + ServerOptions options; + options.use_rdma = true; + options.idle_timeout_sec = 1; + options.max_concurrency = 0; + options.internal_port = -1; + server[i].AddService(&svc[i], SERVER_DOESNT_OWN_SERVICE); + EXPECT_EQ(0, server[i].Start(i + 8000, &options)); + } + + int port = 0; + butil::IOBuf attach; + attach.resize(4096); + ChannelOptions chan_options; + chan_options.use_rdma = true; + chan_options.connect_timeout_ms = 500; + chan_options.timeout_ms = 500; + chan_options.max_retry = 0; + Channel channel[RPC_NUM]; + Server* svr[RPC_NUM]; + Controller cntl[RPC_NUM]; + test::EchoRequest req[RPC_NUM]; + test::EchoResponse res[RPC_NUM]; + butil::ip_t ip; + butil::str2ip(g_ip.c_str(), &ip); + for (int i = 0; i < RPC_NUM; ++i) { + svr[i] = &server[i % num]; + butil::EndPoint ep(ip, 8000 + ((port++) % num)); + ASSERT_EQ(0, channel[i].Init(ep, &chan_options)); + req[i].set_message(__FUNCTION__); + cntl[i].request_attachment().append(attach); + google::protobuf::Closure* done = DoNothing(); + ::test::EchoService::Stub(&channel[i]).Echo(&cntl[i], &req[i], &res[i], done); + } + for (int i = 0; i < RPC_NUM; ++i) { + bthread_id_join(cntl[i].call_id()); + if (cntl[i].ErrorCode() == ERPCTIMEDOUT) { + SocketUniquePtr s; + ASSERT_EQ(0, Socket::Address(cntl[i]._single_server_id, &s)); + std::vector sids; + svr[i]->_am->ListConnections(&sids); + for (size_t i = 0; i < sids.size(); ++i) { + SocketUniquePtr m; + ASSERT_EQ(0, Socket::AddressFailedAsWell(sids[i], &m)); + DumpRdmaEndpointInfo(s.get(), m.get()); + } + } + ASSERT_EQ(0, cntl[i].ErrorCode()) << "req[" << i << "]"; + } + + for (int i = 0; i < num; ++i) { + server[i].Stop(0); + server[i].Join(); + } +} + +TEST_F(RdmaTest, send_rpcs_as_pooled_connection) { + if (!FLAGS_rdma_test_enable) { + return; + } + + StartServer(); + + Channel channel; + ChannelOptions chan_options; + chan_options.use_rdma = true; + chan_options.connect_timeout_ms = 30000; // it may very slow + chan_options.timeout_ms = 30000; + chan_options.max_retry = 0; + chan_options.connection_type = "pooled"; + ASSERT_EQ(0, channel.Init(g_ep, &chan_options)); + Controller cntl[RPC_NUM]; + test::EchoRequest req[RPC_NUM]; + test::EchoResponse res[RPC_NUM]; + + butil::IOBuf attach; + attach.resize(4096); + for (int i = 0; i < RPC_NUM; ++i) { + req[i].set_message(__FUNCTION__); + cntl[i].request_attachment().append(attach); + google::protobuf::Closure* done = DoNothing(); + ::test::EchoService::Stub(&channel).Echo(&cntl[i], &req[i], &res[i], done); + } + for (int i = 0; i < RPC_NUM; ++i) { + bthread_id_join(cntl[i].call_id()); + if (cntl[i].ErrorCode() == ERPCTIMEDOUT) { + SocketUniquePtr s; + ASSERT_EQ(0, Socket::Address(cntl[i]._single_server_id, &s)); + Socket* m = GetSocketFromServer(0); + DumpRdmaEndpointInfo(s.get(), m); + } + ASSERT_EQ(0, cntl[i].ErrorCode()) << "req[" << i << "]"; + } + + StopServer(); +} + +TEST_F(RdmaTest, send_rpcs_as_short_connection) { + if (!FLAGS_rdma_test_enable) { + return; + } + + StartServer(); + + Channel channel; + ChannelOptions chan_options; + chan_options.use_rdma = true; + chan_options.connect_timeout_ms = 30000; // it may very slow + chan_options.timeout_ms = 30000; + chan_options.max_retry = 0; + chan_options.connection_type = "short"; + ASSERT_EQ(0, channel.Init(g_ep, &chan_options)); + Controller cntl[RPC_NUM]; + test::EchoRequest req[RPC_NUM]; + test::EchoResponse res[RPC_NUM]; + + butil::IOBuf attach; + attach.resize(4096); + for (int i = 0; i < RPC_NUM; ++i) { + req[i].set_message(__FUNCTION__); + cntl[i].request_attachment().append(attach); + google::protobuf::Closure* done = DoNothing(); + ::test::EchoService::Stub(&channel).Echo(&cntl[i], &req[i], &res[i], done); + } + for (int i = 0; i < RPC_NUM; ++i) { + bthread_id_join(cntl[i].call_id()); + if (cntl[i].ErrorCode() == ERPCTIMEDOUT) { + SocketUniquePtr s; + ASSERT_EQ(0, Socket::Address(cntl[i]._single_server_id, &s)); + Socket* m = GetSocketFromServer(0); + DumpRdmaEndpointInfo(s.get(), m); + } + ASSERT_EQ(0, cntl[i].ErrorCode()) << "req[" << i << "]"; + } + + StopServer(); +} + +TEST_F(RdmaTest, server_stop_during_rpc) { + if (!FLAGS_rdma_test_enable) { + return; + } + + StartServer(); + + Channel channel; + ChannelOptions chan_options; + chan_options.use_rdma = true; + chan_options.connect_timeout_ms = 500; + chan_options.timeout_ms = 3000; + chan_options.max_retry = 0; + ASSERT_EQ(0, channel.Init(g_ep, &chan_options)); + Controller cntl[RPC_NUM]; + test::EchoRequest req[RPC_NUM]; + test::EchoResponse res[RPC_NUM]; + + butil::IOBuf attach; + attach.resize(4096); + for (int i = 0; i < RPC_NUM; ++i) { + req[i].set_message(__FUNCTION__); + cntl[i].request_attachment().append(attach); + google::protobuf::Closure* done = DoNothing(); + ::test::EchoService::Stub(&channel).Echo(&cntl[i], &req[i], &res[i], done); + } + + for (int i = 0; i < RPC_NUM; ++i) { + bthread_id_join(cntl[i].call_id()); + if (i == 0) StopServer(); + int error_code = cntl[i].ErrorCode(); + ASSERT_TRUE(error_code == 0 || + error_code == EEOF || + error_code == ELOGOFF || + error_code == EHOSTDOWN) << "req[" << i << "]: " << error_code; + } +} + +TEST_F(RdmaTest, server_close_during_rpc) { + if (!FLAGS_rdma_test_enable) { + return; + } + + StartServer(); + + Channel channel; + ChannelOptions chan_options; + chan_options.use_rdma = true; + chan_options.connect_timeout_ms = 500; + chan_options.timeout_ms = 3000; + chan_options.max_retry = 0; + ASSERT_EQ(0, channel.Init(g_ep, &chan_options)); + Controller cntl[RPC_NUM]; + test::EchoRequest req[RPC_NUM]; + test::EchoResponse res[RPC_NUM]; + + butil::IOBuf attach; + attach.resize(4096); + for (int i = 0; i < RPC_NUM; ++i) { + req[i].set_message(__FUNCTION__); + cntl[i].request_attachment().append(attach); + if (i == RPC_NUM / 2) { + req[i].set_close_fd(true); + } + google::protobuf::Closure* done = DoNothing(); + ::test::EchoService::Stub(&channel).Echo(&cntl[i], &req[i], &res[i], done); + } + + for (int i = 0; i < RPC_NUM; ++i) { + bthread_id_join(cntl[i].call_id()); + int error_code = cntl[i].ErrorCode(); + ASSERT_TRUE(error_code == 0 || + error_code == EEOF || + error_code == EFAILEDSOCKET || + error_code == EHOSTDOWN) << "req[" << i << "]: " << error_code; + } + + StopServer(); +} + +TEST_F(RdmaTest, client_close_during_rpc) { + if (!FLAGS_rdma_test_enable) { + return; + } + + StartServer(); + + Channel channel; + ChannelOptions chan_options; + chan_options.use_rdma = true; + chan_options.connect_timeout_ms = 500; + chan_options.timeout_ms = 3000; + chan_options.max_retry = 0; + ASSERT_EQ(0, channel.Init(g_ep, &chan_options)); + Controller cntl[RPC_NUM]; + test::EchoRequest req[RPC_NUM]; + test::EchoResponse res[RPC_NUM]; + + butil::IOBuf attach; + attach.resize(4096); + for (int i = 0; i < RPC_NUM; ++i) { + req[i].set_message(__FUNCTION__); + cntl[i].request_attachment().append(attach); + google::protobuf::Closure* done = DoNothing(); + ::test::EchoService::Stub(&channel).Echo(&cntl[i], &req[i], &res[i], done); + } + + cntl[0].CloseConnection("Close connection"); + + for (int i = 0; i < RPC_NUM; ++i) { + bthread_id_join(cntl[i].call_id()); + int error_code = cntl[i].ErrorCode(); + ASSERT_TRUE(error_code == 0 || + error_code == ECLOSE || + error_code == EHOSTDOWN) << "req[" << i << "]: " << error_code; + } + + StopServer(); +} + +TEST_F(RdmaTest, verbs_error_handling) { + if (!FLAGS_rdma_test_enable) { + return; + } + + StartServer(); + + Channel channel; + ChannelOptions chan_options; + chan_options.use_rdma = true; + chan_options.connect_timeout_ms = 500; + chan_options.timeout_ms = 500; + chan_options.max_retry = 0; + ASSERT_EQ(0, channel.Init(g_ep, &chan_options)); + + Controller cntl; + test::EchoRequest req; + test::EchoResponse res; + req.set_message(__FUNCTION__); + req.set_sleep_us(200000); + google::protobuf::Closure* done = DoNothing(); + ::test::EchoService::Stub(&channel).Echo(&cntl, &req, &res, done); + + usleep(100000); // wait for rdma handshake complete + + SocketUniquePtr s; + ASSERT_EQ(0, Socket::Address(cntl._single_server_id, &s)); + ibv_send_wr wr; + memset(&wr, 0, sizeof(wr)); + ibv_sge sge; + void* buf = malloc(8192); + sge.addr = (uint64_t)buf; + sge.length = 8192; + sge.lkey = 1; // incorrect lkey + wr.sg_list = &sge; + wr.num_sge = 1; + ibv_send_wr* bad = NULL; + ibv_post_send(s->_rdma_ep->_resource->qp, &wr, &bad); + bthread_id_join(cntl.call_id()); + ASSERT_EQ(ERDMA, cntl.ErrorCode()); + free(buf); + + StopServer(); +} + +TEST_F(RdmaTest, rdma_use_parallel_channel) { + if (!FLAGS_rdma_test_enable) { + return; + } + + StartServer(); + + const size_t NCHANS = 8; + Channel subchans[NCHANS]; + ParallelChannel channel; + ChannelOptions opts; + opts.use_rdma = true; + for (size_t i = 0; i < NCHANS; ++i) { + ASSERT_EQ(0, subchans[i].Init(_naming_url.c_str(), "rR", &opts)); + ASSERT_EQ(0, channel.AddChannel( + &subchans[i], DOESNT_OWN_CHANNEL, + NULL, NULL)); + } + + Controller cntl; + test::EchoRequest req; + test::EchoResponse res; + req.set_message(__FUNCTION__); + ::test::EchoService::Stub(&channel).Echo(&cntl, &req, &res, NULL); + + ASSERT_EQ(0, cntl.ErrorCode()); + ASSERT_EQ(NCHANS, (size_t)cntl.sub_count()); + + StopServer(); +} + +TEST_F(RdmaTest, rdma_use_selective_channel) { + if (!FLAGS_rdma_test_enable) { + return; + } + + StartServer(); + + const size_t NCHANS = 8; + SelectiveChannel channel; + ChannelOptions opts; + opts.use_rdma = true; + ASSERT_EQ(0, channel.Init("rr", &opts)); + for (size_t i = 0; i < NCHANS; ++i) { + Channel* subchan = new Channel; + ASSERT_EQ(0, subchan->Init(_naming_url.c_str(), "rR", &opts)); + ASSERT_EQ(0, channel.AddChannel(subchan, NULL)); + } + + Controller cntl; + test::EchoRequest req; + test::EchoResponse res; + req.set_message(__FUNCTION__); + ::test::EchoService::Stub(&channel).Echo(&cntl, &req, &res, NULL); + + ASSERT_EQ(0, cntl.ErrorCode()) << cntl.ErrorText(); + ASSERT_EQ(1, cntl.sub_count()); + + StopServer(); +} + +TEST_F(RdmaTest, send_rpcs_with_user_defined_iobuf) { + if (!FLAGS_rdma_test_enable) { + return; + } + + StartServer(); + + Channel channel; + ChannelOptions chan_options; + chan_options.use_rdma = true; + chan_options.connect_timeout_ms = 500; + chan_options.timeout_ms = 500; + chan_options.max_retry = 0; + ASSERT_EQ(0, channel.Init(g_ep, &chan_options)); + Controller cntl[RPC_NUM]; + test::EchoRequest req[RPC_NUM]; + test::EchoResponse res[RPC_NUM]; + + butil::IOBuf attach; + void* data = malloc(4096);; + attach.append_user_data(data, 4096, NULL); + req[0].set_message(__FUNCTION__); + cntl[0].request_attachment().append(attach); + google::protobuf::Closure* done = DoNothing(); + ::test::EchoService::Stub(&channel).Echo(&cntl[0], &req[0], &res[0], done); + bthread_id_join(cntl[0].call_id()); + ASSERT_EQ(ERDMAMEM, cntl[0].ErrorCode()); + attach.clear(); + sleep(2); // wait for client recover from EHOSTDOWN + + void* mr[RPC_NUM]; + butil::IOBuf attachment[RPC_NUM]; + for (int i = 1; i < RPC_NUM; ++i) { + mr[i] = malloc(4096); + memset(mr[i], i % 100, 4096); + ASSERT_EQ(0, rdma::RegisterMemoryForRdma(mr[i], 4096)); + attachment[i].append_user_data(mr[i], 4096, NULL); + req[i].set_message(__FUNCTION__); + cntl[i].request_attachment().append(attachment[i]); + google::protobuf::Closure* done = DoNothing(); + ::test::EchoService::Stub(&channel).Echo(&cntl[i], &req[i], &res[i], done); + } + for (int i = 1; i < RPC_NUM; ++i) { + bthread_id_join(cntl[i].call_id()); + ASSERT_EQ(0, cntl[i].ErrorCode()) << "req[" << i << "]"; + rdma::DeregisterMemoryForRdma(mr[i]); + ASSERT_EQ(4096, cntl[i].response_attachment().size()); + char tmp[4096]; + cntl[i].response_attachment().copy_to(tmp, 4096); + ASSERT_EQ(0, memcmp(mr[i], tmp, 4096)); + } + + StopServer(); +} + +TEST_F(RdmaTest, try_memory_pool_empty) { + if (!FLAGS_rdma_test_enable) { + return; + } + + StartServer(); + + Channel channel; + ChannelOptions chan_options; + chan_options.use_rdma = true; + chan_options.connect_timeout_ms = 500; + chan_options.timeout_ms = 60000; + chan_options.max_retry = 0; + ASSERT_EQ(0, channel.Init(g_ep, &chan_options)); + Controller cntl[RPC_NUM]; + test::EchoRequest req[RPC_NUM]; + test::EchoResponse res[RPC_NUM]; + + butil::IOBuf iobuf[RPC_NUM]; + for (int i = 0; i < 1024; ++i) { + if (iobuf[i].resize(1048576 * 8)) { + // 8MB for each iobuf + break; + } + } + + for (int i = 0; i < RPC_NUM; ++i) { + req[i].set_message(__FUNCTION__); + cntl[i].request_attachment().append(iobuf[i]); + google::protobuf::Closure* done = DoNothing(); + ::test::EchoService::Stub(&channel).Echo(&cntl[i], &req[i], &res[i], done); + } + for (int i = 0; i < RPC_NUM; ++i) { + bthread_id_join(cntl[i].call_id()); + } + + StopServer(); +} + +#endif // if BRPC_WITH_RDMA + +int main(int argc, char* argv[]) { + testing::InitGoogleTest(&argc, argv); + GFLAGS_NS::ParseCommandLineFlags(&argc, &argv, true); +#if BRPC_WITH_RDMA + rdma::FLAGS_rdma_trace_verbose = true; + rdma::FLAGS_rdma_memory_pool_max_regions = 2; + FLAGS_log_idle_connection_close = true; + if (!FLAGS_rdma_test_enable) { + // skip UT requiring rdma runtime environment + rdma::g_rdma_available.store(true, butil::memory_order_relaxed); + rdma::g_skip_rdma_init = true; + } +#endif // if BRPC_WITH_RDMA + return RUN_ALL_TESTS(); +} diff --git a/test/brpc_server_unittest.cpp b/test/brpc_server_unittest.cpp index 98747a049a..c22b6b53b4 100644 --- a/test/brpc_server_unittest.cpp +++ b/test/brpc_server_unittest.cpp @@ -64,6 +64,11 @@ int main(int argc, char* argv[]) { namespace brpc { DECLARE_bool(enable_threads_service); DECLARE_bool(enable_dir_service); + +namespace policy { +DECLARE_bool(use_http_error_code); +} + } namespace { @@ -929,6 +934,158 @@ TEST_F(ServerTest, restful_mapping) { ASSERT_EQ(0u, server1._global_restful_map->size()); } +TEST_F(ServerTest, http_error_code) { + brpc::policy::FLAGS_use_http_error_code = true; + + const int port = 9200; + // missing_required_fields -> brpc::EREQUEST + { + brpc::Server server1; + EchoServiceV1 service_v1; + ASSERT_EQ(0, server1.AddService(&service_v1, brpc::SERVER_DOESNT_OWN_SERVICE)); + ASSERT_EQ(0, server1.Start(port, NULL)); + + brpc::Channel http_channel; + brpc::ChannelOptions chan_options; + chan_options.protocol = "http"; + ASSERT_EQ(0, http_channel.Init("0.0.0.0", port, &chan_options)); + brpc::Controller cntl; + cntl.http_request().uri() = "/EchoService/Echo"; + http_channel.CallMethod(NULL, &cntl, NULL, NULL, NULL); + ASSERT_TRUE(cntl.Failed()); + ASSERT_EQ(brpc::EREQUEST, cntl.ErrorCode()); + LOG(INFO) << cntl.ErrorText(); + ASSERT_EQ(brpc::HTTP_STATUS_BAD_REQUEST, cntl.http_response().status_code()); + ASSERT_EQ(0, service_v1.ncalled.load()); + } + + // disallow_http_body_to_pb -> brpc::ERESPONSE + { + brpc::Server server1; + EchoServiceV1 service_v1; + brpc::ServiceOptions svc_opt; + svc_opt.allow_http_body_to_pb = false; + svc_opt.restful_mappings = "/access_echo1=>Echo"; + ASSERT_EQ(0, server1.AddService(&service_v1, svc_opt)); + ASSERT_EQ(0, server1.Start(port, NULL)); + brpc::Channel http_channel; + brpc::ChannelOptions chan_options; + chan_options.protocol = "http"; + ASSERT_EQ(0, http_channel.Init("0.0.0.0", port, &chan_options)); + brpc::Controller cntl; + cntl.http_request().uri() = "/access_echo1"; + http_channel.CallMethod(NULL, &cntl, NULL, NULL, NULL); + ASSERT_TRUE(cntl.Failed()); + ASSERT_EQ(brpc::ERESPONSE, cntl.ErrorCode()); + ASSERT_EQ(brpc::HTTP_STATUS_INTERNAL_SERVER_ERROR, + cntl.http_response().status_code()); + ASSERT_EQ(1, service_v1.ncalled.load()); + } + + // restful_mapping -> brpc::ENOMETHOD + { + brpc::Server server1; + EchoServiceV1 service_v1; + ASSERT_EQ(0u, server1.service_count()); + ASSERT_EQ(0, server1.AddService( + &service_v1, + brpc::SERVER_DOESNT_OWN_SERVICE, + "/v1/echo/ => Echo," + + // Map another path to the same method is ok. + "/v3/echo => Echo," + + // end with wildcard + "/v2/echo/* => Echo," + + // single-component path should be OK + "/v4_echo => Echo," + + // heading slash can be ignored + " v5/echo => Echo," + + // with or without wildcard can coexist. + " /v6/echo => Echo," + " /v6/echo/* => Echo2," + " /v6/abc/*/def => Echo3," + " /v6/echo/*.flv => Echo4," + " /v6/*.flv => Echo5," + " *.flv => Echo," + )); + ASSERT_EQ(1u, server1.service_count()); + ASSERT_TRUE(server1._global_restful_map); + ASSERT_EQ(1UL, server1._global_restful_map->size()); + + ASSERT_EQ(0, server1.Start(port, NULL)); + brpc::Channel http_channel; + brpc::ChannelOptions chan_options; + chan_options.protocol = "http"; + ASSERT_EQ(0, http_channel.Init("0.0.0.0", port, &chan_options)); + brpc::Controller cntl; + cntl.http_request().uri() = "/v3/echo/anything"; + cntl.http_request().set_method(brpc::HTTP_METHOD_POST); + cntl.request_attachment().append("{\"message\":\"foo\"}"); + http_channel.CallMethod(NULL, &cntl, NULL, NULL, NULL); + ASSERT_TRUE(cntl.Failed()); + ASSERT_EQ(brpc::ENOMETHOD, cntl.ErrorCode()); + LOG(INFO) << "Expected error: " << cntl.ErrorText(); + ASSERT_EQ(0, service_v1.ncalled.load()); + } + + // max_concurrency -> brpc::ELIMIT + { + brpc::Server server1; + EchoServiceImpl service1; + ASSERT_EQ(0, server1.AddService(&service1, brpc::SERVER_DOESNT_OWN_SERVICE)); + server1.MaxConcurrencyOf("test.EchoService.Echo") = 1; + ASSERT_EQ(1, server1.MaxConcurrencyOf("test.EchoService.Echo")); + server1.MaxConcurrencyOf(&service1, "Echo") = 2; + ASSERT_EQ(2, server1.MaxConcurrencyOf(&service1, "Echo")); + + ASSERT_EQ(0, server1.Start(port, NULL)); + brpc::Channel http_channel; + brpc::ChannelOptions chan_options; + chan_options.protocol = "http"; + ASSERT_EQ(0, http_channel.Init("0.0.0.0", port, &chan_options)); + + brpc::Channel normal_channel; + ASSERT_EQ(0, normal_channel.Init("0.0.0.0", port, NULL)); + test::EchoService_Stub stub(&normal_channel); + + brpc::Controller cntl1; + cntl1.http_request().uri() = "/EchoService/Echo"; + cntl1.http_request().set_method(brpc::HTTP_METHOD_POST); + cntl1.request_attachment().append("{\"message\":\"hello\",\"sleep_us\":100000}"); + http_channel.CallMethod(NULL, &cntl1, NULL, NULL, brpc::DoNothing()); + + brpc::Controller cntl2; + test::EchoRequest req; + test::EchoResponse res; + req.set_message("hello"); + req.set_sleep_us(100000); + stub.Echo(&cntl2, &req, &res, brpc::DoNothing()); + + bthread_usleep(20000); + LOG(INFO) << "Send other requests"; + + brpc::Controller cntl3; + cntl3.http_request().uri() = "/EchoService/Echo"; + cntl3.http_request().set_method(brpc::HTTP_METHOD_POST); + cntl3.request_attachment().append("{\"message\":\"hello\"}"); + http_channel.CallMethod(NULL, &cntl3, NULL, NULL, NULL); + ASSERT_TRUE(cntl3.Failed()); + ASSERT_EQ(brpc::ELIMIT, cntl3.ErrorCode()); + ASSERT_EQ(brpc::HTTP_STATUS_SERVICE_UNAVAILABLE, cntl3.http_response().status_code()); + + brpc::Join(cntl1.call_id()); + brpc::Join(cntl2.call_id()); + ASSERT_FALSE(cntl1.Failed()) << cntl1.ErrorText(); + ASSERT_FALSE(cntl2.Failed()) << cntl2.ErrorText(); + } + + brpc::policy::FLAGS_use_http_error_code = false; +} + TEST_F(ServerTest, conflict_name_between_restful_mapping_and_builtin) { const int port = 9200; EchoServiceV1 service_v1; diff --git a/test/bthread_cond_bug_unittest.cpp b/test/bthread_cond_bug_unittest.cpp new file mode 100644 index 0000000000..90881f5669 --- /dev/null +++ b/test/bthread_cond_bug_unittest.cpp @@ -0,0 +1,141 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include +#include + +#include +#include + +#include "bthread/bthread.h" +#include "bthread/condition_variable.h" +#include "bthread/mutex.h" +#include "butil/logging.h" +#include "butil/macros.h" +#include "bvar/bvar.h" + +DEFINE_int64(wait_us, 5, "wait us"); +typedef std::unique_lock Lock; +typedef bthread::ConditionVariable Condition; +bthread::Mutex g_mutex; +Condition g_cond; +std::deque g_que; +const size_t g_capacity = 2000; +const int PRODUCER_NUM = 5; +struct ProducerStat { + std::atomic loop_count; + bvar::Adder wait_count; + bvar::Adder wait_timeout_count; + bvar::Adder wait_success_count; +}; +ProducerStat g_stat[PRODUCER_NUM]; + +void* print_func(void* arg) { + int last_loop[PRODUCER_NUM] = {0}; + for (int j = 0; j < 10; j++) { + usleep(1000000); + for (int i = 0; i < PRODUCER_NUM; i++) { + if (g_stat[i].loop_count.load() <= last_loop[i]) { + LOG(ERROR) << "producer thread:" << i << " stopped"; + return nullptr; + } + LOG(INFO) << "producer stat idx:" << i + << " wait:" << g_stat[i].wait_count + << " wait_timeout:" << g_stat[i].wait_timeout_count + << " wait_success:" << g_stat[i].wait_success_count; + g_stat[i].loop_count = g_stat[i].loop_count.load(); + } + } + return (void*)1; +} + +void* produce_func(void* arg) { + const int64_t wait_us = FLAGS_wait_us; + LOG(INFO) << "wait us:" << wait_us; + int64_t idx = (int64_t)(arg); + int32_t i = 0; + while (!bthread_stopped(bthread_self())) { + //LOG(INFO) << "come to a new round " << idx << "round[" << i << "]"; + { + Lock lock(g_mutex); + while (g_que.size() >= g_capacity && !bthread_stopped(bthread_self())) { + g_stat[idx].wait_count << 1; + //LOG(INFO) << "wait begin " << idx; + int ret = g_cond.wait_for(lock, wait_us); + if (ret == ETIMEDOUT) { + g_stat[idx].wait_timeout_count << 1; + //LOG_EVERY_SECOND(INFO) << "wait timeout " << idx; + } else { + g_stat[idx].wait_success_count << 1; + //LOG_EVERY_SECOND(INFO) << "wait early " << idx; + } + } + g_que.push_back(++i); + //LOG(INFO) << "push back " << idx << " data[" << i << "]"; + } + usleep(rand() % 20 + 5); + g_stat[idx].loop_count.fetch_add(1); + } + LOG(INFO) << "producer func return, idx:" << idx; + return nullptr; +} + +void* consume_func(void* arg) { + while (!bthread_stopped(bthread_self())) { + bool need_notify = false; + { + Lock lock(g_mutex); + need_notify = (g_que.size() == g_capacity); + if (!g_que.empty()) { + g_que.pop_front(); + LOG_EVERY_SECOND(INFO) << "pop a data"; + } else { + LOG_EVERY_SECOND(INFO) << "que is empty"; + } + } + usleep(rand() % 300 + 500); + if (need_notify) { + //g_cond.notify_all(); + //LOG(WARNING) << "notify"; + } + } + LOG(INFO) << "consumer func return"; + return nullptr; +} + +TEST(BthreadCondBugTest, test_bug) { + bthread_t tids[PRODUCER_NUM]; + for (int i = 0; i < PRODUCER_NUM; i++) { + bthread_start_background(&tids[i], NULL, produce_func, (void*)(int64_t)i); + } + bthread_t tid; + bthread_start_background(&tid, NULL, consume_func, NULL); + + int64_t ret = (int64_t)print_func(nullptr); + + bthread_stop(tid); + bthread_join(tid, nullptr); + for (int i = 0; i < PRODUCER_NUM; i++) { + bthread_stop(tids[i]); + bthread_join(tids[i], nullptr); + } + + ASSERT_EQ(ret, 1); +} diff --git a/test/bvar_recorder_unittest.cpp b/test/bvar_recorder_unittest.cpp index 2f6ea20eaa..b0938da99d 100644 --- a/test/bvar_recorder_unittest.cpp +++ b/test/bvar_recorder_unittest.cpp @@ -243,7 +243,7 @@ TEST(RecorderTest, latency_recorder_qps_accuracy) { ASSERT_GT(0.1, read(lr4, 1/2.0)); ASSERT_GT(0.1, read(lr1, 10/3.0, 3)); - ASSERT_GT(0.1, read(lr2, 11/3.0, 3)); + ASSERT_GT(0.2, read(lr2, 11/3.0, 3)); ASSERT_GT(0.1, read(lr3, 3/3.0, 3)); ASSERT_GT(0.1, read(lr4, 1/3.0, 3)); } diff --git a/test/echo.proto b/test/echo.proto index 10e12d474c..2a47b234e9 100644 --- a/test/echo.proto +++ b/test/echo.proto @@ -78,6 +78,11 @@ service DiscoveryNamingService { rpc Cancel(HttpRequest) returns (HttpResponse); }; +service NacosNamingService { + rpc Login(HttpRequest) returns (HttpResponse); + rpc List(HttpRequest) returns (HttpResponse); +}; + enum State0 { STATE0_NUM_0 = 0; STATE0_NUM_1 = 1; diff --git a/tools/BUILD b/tools/BUILD index 04b3abbf92..a836cb0449 100644 --- a/tools/BUILD +++ b/tools/BUILD @@ -27,7 +27,7 @@ COPTS = [ "-fno-omit-frame-pointer", "-DGFLAGS_NS=google", ] + select({ - "//:with_glog": ["-DBRPC_WITH_GLOG=1"], + "//bazel/config:brpc_with_glog": ["-DBRPC_WITH_GLOG=1"], "//conditions:default": ["-DBRPC_WITH_GLOG=0"], }) diff --git a/tools/rpc_replay/rpc_replay.cpp b/tools/rpc_replay/rpc_replay.cpp index d022e3cbdd..412397b29c 100644 --- a/tools/rpc_replay/rpc_replay.cpp +++ b/tools/rpc_replay/rpc_replay.cpp @@ -29,6 +29,7 @@ #include #include #include +#include "brpc/options.pb.h" #include "info_thread.h" DEFINE_string(dir, "", "The directory of dumped requests"); @@ -82,21 +83,25 @@ int ChannelGroup::Init() { } _chans.resize(max_protocol_size + 1); for (size_t i = 0; i < protocols.size(); ++i) { - if (protocols[i].second.support_client() && - protocols[i].second.support_server()) { - const brpc::ProtocolType prot = protocols[i].first; + const brpc::ProtocolType protocol_type = protocols[i].first; + const brpc::Protocol protocol = protocols[i].second; + brpc::ChannelOptions options; + options.protocol = protocol_type; + options.connection_type = FLAGS_connection_type; + options.timeout_ms = FLAGS_timeout_ms/*milliseconds*/; + options.max_retry = FLAGS_max_retry; + if ((options.connection_type == brpc::CONNECTION_TYPE_UNKNOWN || + options.connection_type & protocol.supported_connection_type) && + protocol.support_client() && + protocol.support_server()) { brpc::Channel* chan = new brpc::Channel; - brpc::ChannelOptions options; - options.protocol = prot; - options.connection_type = FLAGS_connection_type; - options.timeout_ms = FLAGS_timeout_ms/*milliseconds*/; - options.max_retry = FLAGS_max_retry; if (chan->Init(FLAGS_server.c_str(), FLAGS_load_balancer.c_str(), &options) != 0) { LOG(ERROR) << "Fail to initialize channel"; + delete chan; return -1; } - _chans[prot] = chan; + _chans[protocol_type] = chan; } } return 0; @@ -135,14 +140,10 @@ static void* replay_thread(void* arg) { double req_rate = FLAGS_qps / (double)FLAGS_thread_num; brpc::SerializedRequest req; brpc::NsheadMessage nshead_req; - std::deque timeq; - size_t MAX_QUEUE_SIZE = (size_t)req_rate; - if (MAX_QUEUE_SIZE < 100) { - MAX_QUEUE_SIZE = 100; - } else if (MAX_QUEUE_SIZE > 2000) { - MAX_QUEUE_SIZE = 2000; - } - timeq.push_back(butil::gettimeofday_us()); + int64_t last_expected_time = butil::monotonic_time_ns(); + const int64_t interval = (int64_t) (1000000000L / req_rate); + // the max tolerant delay between end_time and expected_time. 10ms or 10 intervals + int64_t max_tolerant_delay = std::max((int64_t) 10000000L, 10 * interval); for (int i = 0; !brpc::IsAskedToQuit() && i < FLAGS_times; ++i) { brpc::SampleIterator it(FLAGS_dir); int j = 0; @@ -199,21 +200,15 @@ static void* replay_thread(void* arg) { brpc::NewCallback(handle_response, cntl, start_time, false); chan->CallMethod(NULL/*use rpc_dump_context in cntl instead*/, cntl, req_ptr, NULL/*ignore response*/, done); - const int64_t end_time = butil::gettimeofday_us(); - int64_t expected_elp = 0; - int64_t actual_elp = 0; - timeq.push_back(end_time); - if (timeq.size() > MAX_QUEUE_SIZE) { - actual_elp = end_time - timeq.front(); - timeq.pop_front(); - expected_elp = (size_t)(1000000 * timeq.size() / req_rate); - } else { - actual_elp = end_time - timeq.front(); - expected_elp = (size_t)(1000000 * (timeq.size() - 1) / req_rate); - } - if (actual_elp < expected_elp) { - bthread_usleep(expected_elp - actual_elp); + int64_t end_time = butil::monotonic_time_ns(); + int64_t expected_time = last_expected_time + interval; + if (end_time < expected_time) { + usleep((expected_time - end_time)/1000); } + if (end_time - expected_time > max_tolerant_delay) { + expected_time = end_time; + } + last_expected_time = expected_time; } } } @@ -254,6 +249,14 @@ int main(int argc, char* argv[]) { } } + const int rate_limit_per_thread = 1000000; + int req_rate_per_thread = FLAGS_qps / FLAGS_thread_num; + if (req_rate_per_thread > rate_limit_per_thread) { + LOG(ERROR) << "req_rate: " << (int64_t) req_rate_per_thread << " is too large in one thread. The rate limit is " + << rate_limit_per_thread << " in one thread"; + return false; + } + std::vector bids; std::vector pids; if (!FLAGS_use_bthread) { diff --git a/zlib.BUILD b/zlib.BUILD deleted file mode 100644 index 8b7895278a..0000000000 --- a/zlib.BUILD +++ /dev/null @@ -1,8 +0,0 @@ -package( - default_visibility=["//visibility:public"] -) - -cc_library( - name = "zlib", - linkopts = ["-lz"], -)