Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-44062: [Dev][Archery][Integration] Reduce needless test matrix #44099

Merged
merged 8 commits into from
Sep 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions ci/scripts/integration_arrow.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ gold_dir=$arrow_dir/testing/data/arrow-ipc-stream/integration
: ${ARROW_INTEGRATION_JAVA:=ON}
: ${ARROW_INTEGRATION_JS:=ON}

: ${ARCHERY_INTEGRATION_TARGET_LANGUAGES:=cpp,csharp,go,java,js}
export ARCHERY_INTEGRATION_TARGET_LANGUAGES

. ${arrow_dir}/ci/scripts/util_log.sh

github_actions_group_begin "Integration: Prepare: Archery"
Expand Down
3 changes: 3 additions & 0 deletions dev/archery/archery/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -744,6 +744,9 @@ def _set_default(opt, default):
@click.option('--with-rust', type=bool, default=False,
help='Include Rust in integration tests',
envvar="ARCHERY_INTEGRATION_WITH_RUST")
@click.option('--target-languages', default='',
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"Languages" is not really right here, as nanoarrow for example is not a language. "Implementations" perhaps?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK. I'll rename it: GH-44155

help=('Target languages in this integration tests'),
envvar="ARCHERY_INTEGRATION_TARGET_LANGUAGES")
Comment on lines +747 to +749
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Uh, can the help string be more explanatory? It's really not possible to understand what this does from the current wording.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure. But is the help string a suitable location to explain it more with click API?
In general, --help output uses a simple one-line (or a few lines) per option.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then it can be explained in the command's help: https://click.palletsprojects.com/en/8.1.x/api/#click.Command

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. I'll use it: GH-44158

@click.option('--write_generated_json', default="",
help='Generate test JSON to indicated path')
@click.option('--run-ipc', is_flag=True, default=False,
Expand Down
59 changes: 47 additions & 12 deletions dev/archery/archery/integration/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,13 @@ class IntegrationRunner(object):

def __init__(self, json_files,
flight_scenarios: List[Scenario],
testers: List[Tester], tempdir=None,
debug=False, stop_on_error=True, gold_dirs=None,
testers: List[Tester], other_testers: List[Tester],
tempdir=None, debug=False, stop_on_error=True, gold_dirs=None,
serial=False, match=None, **unused_kwargs):
self.json_files = json_files
self.flight_scenarios = flight_scenarios
self.testers = testers
self.other_testers = other_testers
self.temp_dir = tempdir or tempfile.mkdtemp()
self.debug = debug
self.stop_on_error = stop_on_error
Expand Down Expand Up @@ -100,6 +101,20 @@ def run_ipc(self):
producer, consumer, self._produce_consume,
self.json_files)

for producer, consumer in itertools.product(
filter(lambda t: t.PRODUCER, self.testers),
filter(lambda t: t.CONSUMER, self.other_testers)):
self._compare_ipc_implementations(
producer, consumer, self._produce_consume,
self.json_files)

for producer, consumer in itertools.product(
filter(lambda t: t.PRODUCER, self.other_testers),
filter(lambda t: t.CONSUMER, self.testers)):
self._compare_ipc_implementations(
producer, consumer, self._produce_consume,
self.json_files)

if self.gold_dirs:
for gold_dir, consumer in itertools.product(
self.gold_dirs,
Expand All @@ -124,7 +139,7 @@ def run_flight(self):
"""
servers = filter(lambda t: t.FLIGHT_SERVER, self.testers)
clients = filter(lambda t: (t.FLIGHT_CLIENT and t.CONSUMER),
self.testers)
self.testers + self.other_testers)
for server, client in itertools.product(servers, clients):
self._compare_flight_implementations(server, client)
log('\n')
Expand All @@ -138,6 +153,14 @@ def run_c_data(self):
filter(lambda t: t.C_DATA_SCHEMA_EXPORTER, self.testers),
filter(lambda t: t.C_DATA_SCHEMA_IMPORTER, self.testers)):
self._compare_c_data_implementations(producer, consumer)
for producer, consumer in itertools.product(
filter(lambda t: t.C_DATA_SCHEMA_EXPORTER, self.testers),
filter(lambda t: t.C_DATA_SCHEMA_IMPORTER, self.other_testers)):
self._compare_c_data_implementations(producer, consumer)
for producer, consumer in itertools.product(
filter(lambda t: t.C_DATA_SCHEMA_EXPORTER, self.other_testers),
filter(lambda t: t.C_DATA_SCHEMA_IMPORTER, self.testers)):
self._compare_c_data_implementations(producer, consumer)
log('\n')

def _gold_tests(self, gold_dir):
Expand Down Expand Up @@ -560,31 +583,42 @@ def get_static_json_files():
def run_all_tests(with_cpp=True, with_java=True, with_js=True,
with_csharp=True, with_go=True, with_rust=False,
with_nanoarrow=False, run_ipc=False, run_flight=False,
run_c_data=False, tempdir=None, **kwargs):
run_c_data=False, tempdir=None, target_languages="",
**kwargs):
tempdir = tempdir or tempfile.mkdtemp(prefix='arrow-integration-')
print(["before", target_languages])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You should remove those print calls

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes... I've removed it: #44140

target_languages = list(filter(len, target_languages.split(",")))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why the filter call?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because "".split(",") returns [""] not [].
(I surprised the Python behavior because "".split(",") returns [] with Ruby.)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why would you pass ,? It's ok to make it an error IMHO.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because --target-languages=cpp,java is an input. We need to extract cpp and java from cpp,java.

I know that click supports --target-language=cpp --target-languages=java style. But reusing ci/scripts/integration_arrow.sh in apache/arrow, apache/arrow-rs, apache/arrow-nanoarrow and apache/arrow-go is difficult with the style. Entirely overwritable --target-languages=cpp,java style is easy to reusable.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, you're talking about the empty string, sorry, I had misread.
Then I would suggest the much more idiomatic

Suggested change
target_languages = list(filter(len, target_languages.split(",")))
target_languages = target_languages.split(",") if target_languages else []

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. I'll use it in #44156.

print(["after", target_languages])

testers: List[Tester] = []
other_testers: List[Tester] = []

def append_tester(language, tester):
if len(target_languages) == 0 or language in target_languages:
testers.append(tester)
else:
other_testers.append(tester)

if with_cpp:
testers.append(CppTester(**kwargs))
append_tester("cpp", CppTester(**kwargs))

if with_java:
testers.append(JavaTester(**kwargs))
append_tester("java", JavaTester(**kwargs))

if with_js:
testers.append(JSTester(**kwargs))
append_tester("js", JSTester(**kwargs))

if with_csharp:
testers.append(CSharpTester(**kwargs))
append_tester("csharp", CSharpTester(**kwargs))

if with_go:
testers.append(GoTester(**kwargs))
append_tester("go", GoTester(**kwargs))

if with_nanoarrow:
testers.append(NanoarrowTester(**kwargs))
append_tester("nanoarrow", NanoarrowTester(**kwargs))

if with_rust:
testers.append(RustTester(**kwargs))
append_tester("rust", RustTester(**kwargs))

static_json_files = get_static_json_files()
generated_json_files = datagen.get_generated_json_files(tempdir=tempdir)
Expand Down Expand Up @@ -666,7 +700,8 @@ def run_all_tests(with_cpp=True, with_java=True, with_js=True,
),
]

runner = IntegrationRunner(json_files, flight_scenarios, testers, **kwargs)
runner = IntegrationRunner(json_files, flight_scenarios, testers,
other_testers, **kwargs)
if run_ipc:
runner.run_ipc()
if run_flight:
Expand Down
Loading