Skip to content

Commit

Permalink
Merge branch 'master' into SPARK-47320
Browse files Browse the repository at this point in the history
  • Loading branch information
ashahid committed Jan 14, 2025
2 parents d945855 + bba6839 commit 287c25c
Show file tree
Hide file tree
Showing 59 changed files with 5,621 additions and 3,302 deletions.
14 changes: 6 additions & 8 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -600,19 +600,16 @@ jobs:
done
- name: Install Conda for pip packaging test
if: contains(matrix.modules, 'pyspark-errors')
run: |
curl -s -L "https://github.com/conda-forge/miniforge/releases/download/24.11.2-1/Miniforge3-Linux-x86_64.sh" > miniforge3.sh
bash miniforge3.sh -b -p $HOME/miniforge3
rm miniforge3.sh
uses: conda-incubator/setup-miniconda@v3
with:
miniforge-version: latest
# Run the tests.
- name: Run tests
env: ${{ fromJSON(inputs.envs) }}
shell: 'script -q -e -c "bash {0}"'
run: |
if [[ "$MODULES_TO_TEST" == *"pyspark-errors"* ]]; then
export PATH=$PATH:$HOME/miniforge3/bin
env
which conda
export PATH=$CONDA/bin:$PATH
export SKIP_PACKAGING=false
echo "Python Packaging Tests Enabled!"
fi
Expand All @@ -625,9 +622,10 @@ jobs:
- name: Upload coverage to Codecov
if: fromJSON(inputs.envs).PYSPARK_CODECOV == 'true'
uses: codecov/codecov-action@v5
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
with:
files: ./python/coverage.xml
token: ${{ secrets.CODECOV_TOKEN }}
flags: unittests
name: PySpark
verbose: true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -467,9 +467,6 @@ private enum CaseSensitivity {
*/
private static final int CASE_SENSITIVITY_MASK = 0b1;

private static final String UTF8_BINARY_COLLATION_NAME = "UTF8_BINARY";
private static final String UTF8_LCASE_COLLATION_NAME = "UTF8_LCASE";

private static final int UTF8_BINARY_COLLATION_ID =
new CollationSpecUTF8(CaseSensitivity.UNSPECIFIED, SpaceTrimming.NONE).collationId;
private static final int UTF8_LCASE_COLLATION_ID =
Expand Down Expand Up @@ -655,9 +652,9 @@ protected CollationMeta buildCollationMeta() {
protected String normalizedCollationName() {
StringBuilder builder = new StringBuilder();
if(caseSensitivity == CaseSensitivity.UNSPECIFIED){
builder.append(UTF8_BINARY_COLLATION_NAME);
builder.append(CollationNames.UTF8_BINARY);
} else{
builder.append(UTF8_LCASE_COLLATION_NAME);
builder.append(CollationNames.UTF8_LCASE);
}
if (spaceTrimming != SpaceTrimming.NONE) {
builder.append('_');
Expand All @@ -669,12 +666,12 @@ protected String normalizedCollationName() {
static List<CollationIdentifier> listCollations() {
CollationIdentifier UTF8_BINARY_COLLATION_IDENT = new CollationIdentifier(
PROVIDER_SPARK,
UTF8_BINARY_COLLATION_NAME,
CollationNames.UTF8_BINARY,
CollationSpecICU.ICU_VERSION
);
CollationIdentifier UTF8_LCASE_COLLATION_IDENT = new CollationIdentifier(
PROVIDER_SPARK,
UTF8_LCASE_COLLATION_NAME,
CollationNames.UTF8_LCASE,
CollationSpecICU.ICU_VERSION
);
return Arrays.asList(UTF8_BINARY_COLLATION_IDENT, UTF8_LCASE_COLLATION_IDENT);
Expand Down Expand Up @@ -758,7 +755,7 @@ private enum AccentSensitivity {
VersionInfo.ICU_VERSION.getMinor());

static {
ICULocaleMap.put("UNICODE", ULocale.ROOT);
ICULocaleMap.put(CollationNames.UNICODE, ULocale.ROOT);
// ICU-implemented `ULocale`s which have corresponding `Collator` installed.
ULocale[] locales = Collator.getAvailableULocales();
// Build locale names in format: language["_" optional script]["_" optional country code].
Expand Down Expand Up @@ -806,13 +803,13 @@ private enum AccentSensitivity {
}

private static final int UNICODE_COLLATION_ID = new CollationSpecICU(
"UNICODE",
CollationNames.UNICODE,
CaseSensitivity.CS,
AccentSensitivity.AS,
SpaceTrimming.NONE).collationId;

private static final int UNICODE_CI_COLLATION_ID = new CollationSpecICU(
"UNICODE",
CollationNames.UNICODE,
CaseSensitivity.CI,
AccentSensitivity.AS,
SpaceTrimming.NONE).collationId;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql.catalyst.util;

public class CollationNames {
public static final String UTF8_BINARY = "UTF8_BINARY";
public static final String UTF8_LCASE = "UTF8_LCASE";
public static final String UNICODE = "UNICODE";
public static final String UNICODE_CI = "UNICODE_CI";
}
Loading

0 comments on commit 287c25c

Please sign in to comment.