From c9725ffb4fad8c5670c4f2f6723b2d920c6ef608 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Wed, 17 Sep 2025 14:45:45 +0100 Subject: [PATCH 1/3] HADOOP-19696. hadoop binary distribution to move cloud connectors to hadoop common/lib * new assembly for hadoop cloud storage * hadoop-cloud-storage does the assembly on -Pdist * layout stitching to move into share/hadoop/common/lib * remove connectors from hadoop-tools-dist * cut old jackson version from huawaei cloud dependency -even though it was being upgraded by our own artifacts, it was a complication. --- dev-support/bin/dist-layout-stitching | 4 ++ .../assemblies/hadoop-cloud-storage.xml | 55 ++++++++++++++++ .../main/resources/assemblies/hadoop-src.xml | 1 + .../hadoop-cloud-storage/pom.xml | 63 +++++++++++++++++++ .../hadoop-huaweicloud/pom.xml | 4 ++ hadoop-tools/hadoop-tools-dist/pom.xml | 40 ------------ 6 files changed, 127 insertions(+), 40 deletions(-) create mode 100644 hadoop-assemblies/src/main/resources/assemblies/hadoop-cloud-storage.xml diff --git a/dev-support/bin/dist-layout-stitching b/dev-support/bin/dist-layout-stitching index d4bfd8aaada3b..fee6b92219d4f 100755 --- a/dev-support/bin/dist-layout-stitching +++ b/dev-support/bin/dist-layout-stitching @@ -130,6 +130,10 @@ run cp -p "${ROOT}/README.txt" . run copy "${ROOT}/hadoop-common-project/hadoop-common/target/hadoop-common-${VERSION}" . run copy "${ROOT}/hadoop-common-project/hadoop-nfs/target/hadoop-nfs-${VERSION}" . run copy "${ROOT}/hadoop-common-project/hadoop-registry/target/hadoop-registry-${VERSION}" . + +# cloud connectors go into common +run copy "${ROOT}/hadoop-cloud-storage-project/hadoop-cloud-storage/target/hadoop-cloud-storage-${VERSION}" . + run copy "${ROOT}/hadoop-hdfs-project/hadoop-hdfs/target/hadoop-hdfs-${VERSION}" . run copy "${ROOT}/hadoop-hdfs-project/hadoop-hdfs-nfs/target/hadoop-hdfs-nfs-${VERSION}" . run copy "${ROOT}/hadoop-hdfs-project/hadoop-hdfs-client/target/hadoop-hdfs-client-${VERSION}" . diff --git a/hadoop-assemblies/src/main/resources/assemblies/hadoop-cloud-storage.xml b/hadoop-assemblies/src/main/resources/assemblies/hadoop-cloud-storage.xml new file mode 100644 index 0000000000000..de668d1516a4a --- /dev/null +++ b/hadoop-assemblies/src/main/resources/assemblies/hadoop-cloud-storage.xml @@ -0,0 +1,55 @@ + + + hadoop-cloud-storage + + dir + + false + + + + ../../hadoop-tools/hadoop-aws/src/main/bin + /bin + 0755 + + + ./../hadoop-tools/hadoop-aws/src/main/shellprofile.d + + * + + /libexec/shellprofile.d + 0755 + + + + + + /share/hadoop/common/lib + false + runtime + false + + + org.apache.hadoop:hadoop-annotations + org.apache.hadoop.thirdparty:hadoop-shaded-guava + + + + diff --git a/hadoop-assemblies/src/main/resources/assemblies/hadoop-src.xml b/hadoop-assemblies/src/main/resources/assemblies/hadoop-src.xml index 871694209393f..7895f4e57142f 100644 --- a/hadoop-assemblies/src/main/resources/assemblies/hadoop-src.xml +++ b/hadoop-assemblies/src/main/resources/assemblies/hadoop-src.xml @@ -57,6 +57,7 @@ **/file:/** **/SecurityAuth.audit* patchprocess/** + **/auth-keys.xml diff --git a/hadoop-cloud-storage-project/hadoop-cloud-storage/pom.xml b/hadoop-cloud-storage-project/hadoop-cloud-storage/pom.xml index 2df2cd3d9242d..10eda1d4b6814 100644 --- a/hadoop-cloud-storage-project/hadoop-cloud-storage/pom.xml +++ b/hadoop-cloud-storage-project/hadoop-cloud-storage/pom.xml @@ -124,6 +124,12 @@ org.apache.hadoop hadoop-huaweicloud compile + + + com.fasterxml.jackson + * + + org.apache.hadoop @@ -146,4 +152,61 @@ + + + + + maven-deploy-plugin + + true + + + + org.apache.rat + apache-rat-plugin + + + + + + + + dist + + false + + + + + org.apache.maven.plugins + maven-assembly-plugin + + + org.apache.hadoop + hadoop-assemblies + ${project.version} + + + + + dist + prepare-package + + single + + + false + false + ${project.artifactId}-${project.version} + + hadoop-cloud-storage + + + + + + + + + diff --git a/hadoop-cloud-storage-project/hadoop-huaweicloud/pom.xml b/hadoop-cloud-storage-project/hadoop-huaweicloud/pom.xml index 641ad3afb1b0e..d00dc1f22c704 100755 --- a/hadoop-cloud-storage-project/hadoop-huaweicloud/pom.xml +++ b/hadoop-cloud-storage-project/hadoop-huaweicloud/pom.xml @@ -169,6 +169,10 @@ log4j-api org.apache.logging.log4j + + com.fasterxml.jackson.core + * + diff --git a/hadoop-tools/hadoop-tools-dist/pom.xml b/hadoop-tools/hadoop-tools-dist/pom.xml index 9fa02a0ac0027..803caf8fa0648 100644 --- a/hadoop-tools/hadoop-tools-dist/pom.xml +++ b/hadoop-tools/hadoop-tools-dist/pom.xml @@ -92,30 +92,12 @@ pom ${project.version} - - org.apache.hadoop - hadoop-aws - compile - ${project.version} - org.apache.hadoop hadoop-kafka compile ${project.version} - - org.apache.hadoop - hadoop-azure - compile - ${project.version} - - - org.apache.hadoop - hadoop-aliyun - compile - ${project.version} - org.apache.hadoop hadoop-sls @@ -127,34 +109,12 @@ ${project.version} compile - - org.apache.hadoop - hadoop-azure-datalake - compile - ${project.version} - org.apache.hadoop hadoop-fs2img compile ${project.version} - - org.apache.hadoop - hadoop-gcp - compile - ${project.version} - - - - * - * - - - From 0aaa6ce66e8a8fc7fc04fa4e2650badf956d531a Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Tue, 23 Sep 2025 18:35:28 +0100 Subject: [PATCH 2/3] HADOOP-19696. cos, huawei, aliyun cloud dependencies * add the artifacts found with the relevant hadoop-* modules to the binary license * leave all three with cloud-storage dependencies such that they don't include these in a pull of hadoop-cloud-storage (regression?) * unless specific profiles cos, huawei and aliyun are declared, at which point they're exported by hadoop-cloud-storage and put into the assembly. This avoids dealing with complex dependencies we don't want (okio, more xml parsers,...), let making it straightforward to build a distro with it if you want. bundle.jar is always getting in. Do I do it here iff -Paws is set or do I delay it until the copy to the final distro artifact tree takes place. delay: keeps it as an export of hadoop-cloud-storage pom early: consistent with the rest --- LICENSE-binary | 21 +++++- .../hadoop-cloud-storage/pom.xml | 68 ++++++++++++++++++- licenses-binary/LICENSE-dom4j.txt | 39 +++++++++++ 3 files changed, 126 insertions(+), 2 deletions(-) create mode 100644 licenses-binary/LICENSE-dom4j.txt diff --git a/LICENSE-binary b/LICENSE-binary index c9d53de77981a..936390a871f92 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -203,7 +203,10 @@ -------------------------------------------------------------------------------- This project bundles some components that are also licensed under the Apache -License Version 2.0: +License Version 2.0. +Note: some of the listed artifacts may not be included in a given build of the binary +distribution; it depends on the build options. This list intends +to be inclusive of all which may be included: hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/nvd3-1.8.5.* (css and js files) @@ -215,6 +218,7 @@ com.aliyun:aliyun-java-sdk-core:4.5.10 com.aliyun:aliyun-java-sdk-kms:2.11.0 com.aliyun:aliyun-java-sdk-ram:3.1.0 com.aliyun:aliyun-java-sdk-sts:3.0.0 +com.aliyun:java-trace-api:0.2.11-beta.jar com.aliyun.oss:aliyun-sdk-oss:3.13.2 com.cedarsoftware:java-util:1.9.0 com.cedarsoftware:json-io:2.5.1 @@ -268,8 +272,13 @@ com.google.http-client:google-http-client-jackson2:1.46.3 com.google.http-client:google-http-client:1.46.3 com.google.j2objc:j2objc-annotations:3.0.0 com.google.oauth-client:google-oauth-client:1.37.0 +com.huaweicloud:esdk-obs-java:3.20.4.2 +com.jamesmurty.utils:java-xmlbuilder-1.2.jar com.microsoft.azure:azure-storage:7.0.0 com.nimbusds:nimbus-jose-jwt:10.4 +com.squareup.okhttp3:okhttp:jar:3.14.2 +com.squareup.okio:okio:jar:1.17.2 +com.volcengine:ve-tos-java-sdk-hadoop:2.8.9.jar com.zaxxer:HikariCP:4.0.3 commons-beanutils:commons-beanutils:1.9.4 commons-cli:commons-cli:1.9.0 @@ -346,6 +355,9 @@ io.opentelemetry:opentelemetry-sdk-logs:1.47.0 io.opentelemetry:opentelemetry-sdk-metrics:1.47.0 io.opentelemetry:opentelemetry-sdk-trace:1.47.0 io.opentelemetry.semconv:opentelemetry-semconv:1.29.0-alpha +io.opentracing:opentracing-api:0.33.0.jar +io.opentracing:opentracing-noop:0.33.0.jar +io.opentracing:opentracing-util:0.33.0.jar io.reactivex:rxjava:1.3.8 io.reactivex:rxjava-string:1.1.1 io.reactivex:rxnetty:0.4.20 @@ -496,6 +508,7 @@ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanage bootstrap v3.3.6 broccoli-asset-rev v2.4.2 broccoli-funnel v1.0.1 +cos_api-bundle-5.6.19.jar datatables v1.11.5 em-helpers v0.5.13 em-table v0.1.6 @@ -552,6 +565,7 @@ org.codehaus.mojo:animal-sniffer-annotations:1.24 org.jruby.jcodings:jcodings:1.0.13 org.jruby.joni:joni:2.1.2 org.ojalgo:ojalgo:43.0 +org.reactivestreams:reactive-streams:1.0.3.jar org.slf4j:jul-to-slf4j:1.7.36 org.slf4j:slf4j-api:1.7.36 org.slf4j:slf4j-reload4j:1.7.36 @@ -622,3 +636,8 @@ Public Domain ------------- aopalliance:aopalliance:1.0 + +Dom4J license +------------- + +org.dom4j:dom4j:2.1.4.jar \ No newline at end of file diff --git a/hadoop-cloud-storage-project/hadoop-cloud-storage/pom.xml b/hadoop-cloud-storage-project/hadoop-cloud-storage/pom.xml index 10eda1d4b6814..69fccb32434f9 100644 --- a/hadoop-cloud-storage-project/hadoop-cloud-storage/pom.xml +++ b/hadoop-cloud-storage-project/hadoop-cloud-storage/pom.xml @@ -99,6 +99,12 @@ org.apache.hadoop hadoop-aliyun compile + + + * + * + + org.apache.hadoop @@ -119,6 +125,12 @@ org.apache.hadoop hadoop-cos compile + + + * + * + + org.apache.hadoop @@ -126,7 +138,7 @@ compile - com.fasterxml.jackson + * * @@ -169,7 +181,10 @@ + + + dist @@ -208,5 +223,56 @@ + + + + huaweicloud + + false + + + + org.apache.hadoop + hadoop-huaweicloud + compile + + + com.fasterxml.jackson + * + + + + + + + + + cos + + false + + + + org.apache.hadoop + hadoop-cos + compile + + + + + + aliyun + + false + + + + org.apache.hadoop + hadoop-aliyun + compile + + + + diff --git a/licenses-binary/LICENSE-dom4j.txt b/licenses-binary/LICENSE-dom4j.txt new file mode 100644 index 0000000000000..1a02acb149474 --- /dev/null +++ b/licenses-binary/LICENSE-dom4j.txt @@ -0,0 +1,39 @@ +Copyright 2001-2023 © MetaStuff, Ltd. and DOM4J contributors. All Rights Reserved. + +Redistribution and use of this software and associated documentation +("Software"), with or without modification, are permitted provided +that the following conditions are met: + +1. Redistributions of source code must retain copyright + statements and notices. Redistributions must also contain a + copy of this document. + +2. Redistributions in binary form must reproduce the + above copyright notice, this list of conditions and the + following disclaimer in the documentation and/or other + materials provided with the distribution. + +3. The name "DOM4J" must not be used to endorse or promote + products derived from this Software without prior written + permission of MetaStuff, Ltd. For written permission, + please contact dom4j-info@metastuff.com. + +4. Products derived from this Software may not be called "DOM4J" + nor may "DOM4J" appear in their names without prior written + permission of MetaStuff, Ltd. DOM4J is a registered + trademark of MetaStuff, Ltd. + +5. Due credit should be given to the DOM4J Project - https://dom4j.github.io/ + +THIS SOFTWARE IS PROVIDED BY METASTUFF, LTD. AND CONTRIBUTORS +``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT +NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +METASTUFF, LTD. OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +OF THE POSSIBILITY OF SUCH DAMAGE. From fe04497ade257aaf4dcbbb9748a9d88fa5a55e1b Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Fri, 26 Sep 2025 10:40:31 +0100 Subject: [PATCH 3/3] HADOOP-19708 volcano tos: remove shading * Unshade tos * explicit declaration of apache http dependencies, with excludes as needed * updated LICENSE-binary --- LICENSE-binary | 2 + .../hadoop-cloud-storage/pom.xml | 48 ++++++++--- .../hadoop-tos/pom.xml | 83 +++++++++++++------ hadoop-project/pom.xml | 24 ++++++ 4 files changed, 119 insertions(+), 38 deletions(-) diff --git a/LICENSE-binary b/LICENSE-binary index 936390a871f92..904a1219ced98 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -385,6 +385,8 @@ org.apache.htrace:htrace-core:3.1.0-incubating org.apache.htrace:htrace-core4:4.1.0-incubating org.apache.httpcomponents:httpclient:4.5.13 org.apache.httpcomponents:httpcore:4.4.13 +org.apache.httpcomponents.client5:httpclient5:5.5 +org.apache.httpcomponents.core5:httpcore5:5.5 org.apache.kafka:kafka-clients:3.9.0 org.apache.kerby:kerb-admin:2.0.3 org.apache.kerby:kerb-client:2.0.3 diff --git a/hadoop-cloud-storage-project/hadoop-cloud-storage/pom.xml b/hadoop-cloud-storage-project/hadoop-cloud-storage/pom.xml index 69fccb32434f9..1cd932d2dd879 100644 --- a/hadoop-cloud-storage-project/hadoop-cloud-storage/pom.xml +++ b/hadoop-cloud-storage-project/hadoop-cloud-storage/pom.xml @@ -95,17 +95,7 @@ - - org.apache.hadoop - hadoop-aliyun - compile - - - * - * - - - + org.apache.hadoop hadoop-aws @@ -147,6 +137,12 @@ org.apache.hadoop hadoop-tos compile + + + * + * + + org.apache.hadoop @@ -259,7 +255,7 @@ - + aliyun @@ -273,6 +269,34 @@ + + + adls + + false + + + + org.apache.hadoop + hadoop-azure-datalake + compile + + + + + + tos + + false + + + + org.apache.hadoop + hadoop-tos + compile + + + diff --git a/hadoop-cloud-storage-project/hadoop-tos/pom.xml b/hadoop-cloud-storage-project/hadoop-tos/pom.xml index 4bbaf74e0f892..1f44982bc9cc3 100644 --- a/hadoop-cloud-storage-project/hadoop-tos/pom.xml +++ b/hadoop-cloud-storage-project/hadoop-tos/pom.xml @@ -48,11 +48,27 @@ hadoop-mapreduce-client-core provided + + org.apache.httpcomponents.client5 + httpclient5 + + + org.apache.httpcomponents.core5 + httpcore5 + com.volcengine ve-tos-java-sdk-hadoop ${ve-tos-java-sdk.version} + + org.apache.httpcomponents.client5 + httpclient5 + + + org.apache.httpcomponents.core5 + httpcore5 + org.slf4j slf4j-api @@ -152,32 +168,6 @@ - - org.apache.maven.plugins - maven-shade-plugin - - - package - - shade - - - true - - - *:* - - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - - - - - - - - com.github.spotbugs spotbugs-maven-plugin @@ -190,4 +180,45 @@ + + + + + shade-tos + + false + + + + + org.apache.maven.plugins + maven-shade-plugin + + + package + + shade + + + true + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + mozilla/public-suffix-list.txt + + + + + + + + + + + + diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index ba16ed9b34e3b..2f94a34efb940 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -78,6 +78,8 @@ 4.5.13 4.4.13 + 5.5 + 5.3.6 1.7.36 @@ -865,6 +867,28 @@ httpcore ${httpcore.version} + + org.apache.httpcomponents.client5 + httpclient5 + ${httpclient5.version} + + + org.slf4j + * + + + + + org.apache.httpcomponents.core5 + httpcore5 + ${httpcore5.version} + + + org.apache.logging.log4j + * + + + commons-codec commons-codec