Skip to content

Commit 83208ee

Browse files
authored
[pick](branch-2.1) pick #43960 #43929 #44177 (#44240)
pick #43960 #43929 #44177
1 parent 83b7482 commit 83208ee

File tree

7 files changed

+92
-29
lines changed

7 files changed

+92
-29
lines changed

be/src/common/config.cpp

+23-2
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,29 @@ DEFINE_Int32(brpc_port, "8060");
6363

6464
DEFINE_Int32(arrow_flight_sql_port, "-1");
6565

66-
DEFINE_mString(public_access_ip, "");
67-
DEFINE_Int32(public_access_port, "-1");
66+
// If the external client cannot directly access priority_networks, set public_host to be accessible
67+
// to external client.
68+
// There are usually two usage scenarios:
69+
// 1. in production environment, it is often inconvenient to expose Doris BE nodes to the external network.
70+
// However, a reverse proxy (such as Nginx) can be added to all Doris BE nodes, and the external client will be
71+
// randomly routed to a Doris BE node when connecting to Nginx. set public_host to the host of Nginx.
72+
// 2. if priority_networks is an internal network IP, and BE node has its own independent external IP,
73+
// but Doris currently does not support modifying priority_networks, setting public_host to the real external IP.
74+
DEFINE_mString(public_host, "");
75+
76+
// If the BE node is connected to the external network through a reverse proxy like Nginx
77+
// and need to use Arrow Flight SQL, should add a server in Nginx to reverse proxy
78+
// `Nginx:arrow_flight_sql_proxy_port` to `BE_priority_networks:arrow_flight_sql_port`. For example:
79+
// upstream arrowflight {
80+
// server 10.16.10.8:8069;
81+
// server 10.16.10.8:8068;
82+
//}
83+
// server {
84+
// listen 8167 http2;
85+
// listen [::]:8167 http2;
86+
// server_name doris.arrowflight.com;
87+
// }
88+
DEFINE_Int32(arrow_flight_sql_proxy_port, "-1");
6889

6990
// the number of bthreads for brpc, the default value is set to -1,
7091
// which means the number of bthreads is #cpu-cores

be/src/common/config.h

+23-5
Original file line numberDiff line numberDiff line change
@@ -100,11 +100,29 @@ DECLARE_Int32(brpc_port);
100100
// Default -1, do not start arrow flight sql server.
101101
DECLARE_Int32(arrow_flight_sql_port);
102102

103-
// If priority_networks is incorrect but cannot be modified, set public_access_ip as BE’s real IP.
104-
// For ADBC client fetch result, default is empty, the ADBC client uses the backend ip to fetch the result.
105-
// If ADBC client cannot access the backend ip, can set public_access_ip to modify the fetch result ip.
106-
DECLARE_mString(public_access_ip);
107-
DECLARE_Int32(public_access_port);
103+
// If the external client cannot directly access priority_networks, set public_host to be accessible
104+
// to external client.
105+
// There are usually two usage scenarios:
106+
// 1. in production environment, it is often inconvenient to expose Doris BE nodes to the external network.
107+
// However, a reverse proxy (such as Nginx) can be added to all Doris BE nodes, and the external client will be
108+
// randomly routed to a Doris BE node when connecting to Nginx. set public_host to the host of Nginx.
109+
// 2. if priority_networks is an internal network IP, and BE node has its own independent external IP,
110+
// but Doris currently does not support modifying priority_networks, setting public_host to the real external IP.
111+
DECLARE_mString(public_host);
112+
113+
// If the BE node is connected to the external network through a reverse proxy like Nginx
114+
// and need to use Arrow Flight SQL, should add a server in Nginx to reverse proxy
115+
// `Nginx:arrow_flight_sql_proxy_port` to `BE_priority_networks:arrow_flight_sql_port`. For example:
116+
// upstream arrowflight {
117+
// server 10.16.10.8:8069;
118+
// server 10.16.10.8:8068;
119+
//}
120+
// server {
121+
// listen 8167 http2;
122+
// listen [::]:8167 http2;
123+
// server_name doris.arrowflight.com;
124+
// }
125+
DECLARE_Int32(arrow_flight_sql_proxy_port);
108126

109127
// the number of bthreads for brpc, the default value is set to -1,
110128
// which means the number of bthreads is #cpu-cores

be/src/pipeline/exec/result_sink_operator.cpp

+11-8
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,17 @@ Status ResultSinkLocalState::init(RuntimeState* state, LocalSinkStateInfo& info)
7171
RETURN_IF_ERROR(state->exec_env()->result_mgr()->create_sender(
7272
state->fragment_instance_id(), p._result_sink_buffer_size_rows, &_sender, true, state));
7373
((PipBufferControlBlock*)_sender.get())->set_dependency(_dependency->shared_from_this());
74+
75+
_output_vexpr_ctxs.resize(p._output_vexpr_ctxs.size());
76+
for (size_t i = 0; i < _output_vexpr_ctxs.size(); i++) {
77+
RETURN_IF_ERROR(p._output_vexpr_ctxs[i]->clone(state, _output_vexpr_ctxs[i]));
78+
}
79+
if (p._sink_type == TResultSinkType::ARROW_FLIGHT_PROTOCAL) {
80+
std::shared_ptr<arrow::Schema> arrow_schema;
81+
RETURN_IF_ERROR(get_arrow_schema_from_expr_ctxs(_output_vexpr_ctxs, &arrow_schema,
82+
state->timezone()));
83+
_sender->register_arrow_schema(arrow_schema);
84+
}
7485
return Status::OK();
7586
}
7687

@@ -79,10 +90,6 @@ Status ResultSinkLocalState::open(RuntimeState* state) {
7990
SCOPED_TIMER(_open_timer);
8091
RETURN_IF_ERROR(Base::open(state));
8192
auto& p = _parent->cast<ResultSinkOperatorX>();
82-
_output_vexpr_ctxs.resize(p._output_vexpr_ctxs.size());
83-
for (size_t i = 0; i < _output_vexpr_ctxs.size(); i++) {
84-
RETURN_IF_ERROR(p._output_vexpr_ctxs[i]->clone(state, _output_vexpr_ctxs[i]));
85-
}
8693
// create writer based on sink type
8794
switch (p._sink_type) {
8895
case TResultSinkType::MYSQL_PROTOCAL: {
@@ -96,10 +103,6 @@ Status ResultSinkLocalState::open(RuntimeState* state) {
96103
break;
97104
}
98105
case TResultSinkType::ARROW_FLIGHT_PROTOCAL: {
99-
std::shared_ptr<arrow::Schema> arrow_schema;
100-
RETURN_IF_ERROR(get_arrow_schema_from_expr_ctxs(_output_vexpr_ctxs, &arrow_schema,
101-
state->timezone()));
102-
_sender->register_arrow_schema(arrow_schema);
103106
_writer.reset(new (std::nothrow) vectorized::VArrowFlightResultWriter(
104107
_sender.get(), _output_vexpr_ctxs, _profile));
105108
break;

be/src/service/internal_service.cpp

+6-3
Original file line numberDiff line numberDiff line change
@@ -923,6 +923,7 @@ void PInternalServiceImpl::fetch_arrow_flight_schema(google::protobuf::RpcContro
923923
auto st = ExecEnv::GetInstance()->result_mgr()->find_arrow_schema(
924924
UniqueId(request->finst_id()).to_thrift(), &schema);
925925
if (!st.ok()) {
926+
LOG(WARNING) << "fetch arrow flight schema failed, errmsg=" << st;
926927
st.to_protobuf(result->mutable_status());
927928
return;
928929
}
@@ -931,9 +932,11 @@ void PInternalServiceImpl::fetch_arrow_flight_schema(google::protobuf::RpcContro
931932
st = serialize_arrow_schema(&schema, &schema_str);
932933
if (st.ok()) {
933934
result->set_schema(std::move(schema_str));
934-
if (!config::public_access_ip.empty() && config::public_access_port != -1) {
935-
result->set_be_arrow_flight_ip(config::public_access_ip);
936-
result->set_be_arrow_flight_port(config::public_access_port);
935+
if (!config::public_host.empty()) {
936+
result->set_be_arrow_flight_ip(config::public_host);
937+
}
938+
if (config::arrow_flight_sql_proxy_port != -1) {
939+
result->set_be_arrow_flight_port(config::arrow_flight_sql_proxy_port);
937940
}
938941
}
939942
st.to_protobuf(result->mutable_status());

be/src/vec/data_types/serde/data_type_number_serde.cpp

+15-6
Original file line numberDiff line numberDiff line change
@@ -78,12 +78,21 @@ void DataTypeNumberSerDe<T>::write_column_to_arrow(const IColumn& column, const
7878
auto arrow_null_map = revert_null_map(null_map, start, end);
7979
auto arrow_null_map_data = arrow_null_map.empty() ? nullptr : arrow_null_map.data();
8080
if constexpr (std::is_same_v<T, UInt8>) {
81-
ARROW_BUILDER_TYPE& builder = assert_cast<ARROW_BUILDER_TYPE&>(*array_builder);
82-
checkArrowStatus(
83-
builder.AppendValues(reinterpret_cast<const uint8_t*>(col_data.data() + start),
84-
end - start,
85-
reinterpret_cast<const uint8_t*>(arrow_null_map_data)),
86-
column.get_name(), array_builder->type()->name());
81+
auto* null_builder = dynamic_cast<arrow::NullBuilder*>(array_builder);
82+
if (null_builder) {
83+
for (size_t i = start; i < end; ++i) {
84+
checkArrowStatus(null_builder->AppendNull(), column.get_name(),
85+
null_builder->type()->name());
86+
}
87+
} else {
88+
ARROW_BUILDER_TYPE& builder = assert_cast<ARROW_BUILDER_TYPE&>(*array_builder);
89+
checkArrowStatus(
90+
builder.AppendValues(reinterpret_cast<const uint8_t*>(col_data.data() + start),
91+
end - start,
92+
reinterpret_cast<const uint8_t*>(arrow_null_map_data)),
93+
column.get_name(), array_builder->type()->name());
94+
}
95+
8796
} else if constexpr (std::is_same_v<T, Int128>) {
8897
auto& string_builder = assert_cast<arrow::StringBuilder&>(*array_builder);
8998
for (size_t i = start; i < end; ++i) {

fe/fe-core/src/main/java/org/apache/doris/service/arrowflight/DorisFlightSqlProducer.java

+9-2
Original file line numberDiff line numberDiff line change
@@ -249,8 +249,15 @@ private FlightInfo executeQueryStatement(String peerIdentity, ConnectContext con
249249
// The query results of Arrow Flight SQL will be randomly saved on a Doris BE node.
250250
// If it is different from the Doris BE node randomly routed by nginx,
251251
// data forwarding needs to be done inside the Doris BE node.
252-
location = Location.forGrpcInsecure(flightSQLConnectProcessor.getPublicAccessAddr().hostname,
253-
flightSQLConnectProcessor.getPublicAccessAddr().port);
252+
if (flightSQLConnectProcessor.getPublicAccessAddr().isSetPort()) {
253+
location = Location.forGrpcInsecure(
254+
flightSQLConnectProcessor.getPublicAccessAddr().hostname,
255+
flightSQLConnectProcessor.getPublicAccessAddr().port);
256+
} else {
257+
location = Location.forGrpcInsecure(
258+
flightSQLConnectProcessor.getPublicAccessAddr().hostname,
259+
connectContext.getResultFlightServerAddr().port);
260+
}
254261
} else {
255262
location = Location.forGrpcInsecure(connectContext.getResultFlightServerAddr().hostname,
256263
connectContext.getResultFlightServerAddr().port);

fe/fe-core/src/main/java/org/apache/doris/service/arrowflight/FlightSqlConnectProcessor.java

+5-3
Original file line numberDiff line numberDiff line change
@@ -131,9 +131,11 @@ public Schema fetchArrowFlightSchema(int timeoutMs) {
131131
throw new RuntimeException(String.format("fetch arrow flight schema failed, queryId: %s, errmsg: %s",
132132
DebugUtil.printId(tid), resultStatus));
133133
}
134-
if (pResult.hasBeArrowFlightIp() && pResult.hasBeArrowFlightPort()) {
135-
publicAccessAddr.hostname = pResult.getBeArrowFlightIp().toStringUtf8();
136-
publicAccessAddr.port = pResult.getBeArrowFlightPort();
134+
if (pResult.hasBeArrowFlightIp()) {
135+
publicAccessAddr.setHostname(pResult.getBeArrowFlightIp().toStringUtf8());
136+
}
137+
if (pResult.hasBeArrowFlightPort()) {
138+
publicAccessAddr.setPort(pResult.getBeArrowFlightPort());
137139
}
138140
if (pResult.hasSchema() && pResult.getSchema().size() > 0) {
139141
RootAllocator rootAllocator = new RootAllocator(Integer.MAX_VALUE);

0 commit comments

Comments
 (0)