Skip to content

Commit

Permalink
vdk-core: test ingestion with multiple threads (#2796)
Browse files Browse the repository at this point in the history
The IIngester methods (send_xxx_for_ingestion) should allow being called
from multiple threads. But it's never explicitly tested. This test is
far from perfect. Testing something is thread safe is not very easy.

But it's good to have it because

A) it's an indication of this thread-safe contract for future developers
B) If there is some serious regression this should catch it even if it
doesn't catch some more subtle regressions.
  • Loading branch information
antoniivanov authored Oct 19, 2023
1 parent 2dc998c commit e17ead2
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# Copyright 2021-2023 VMware, Inc.
# SPDX-License-Identifier: Apache-2.0
import random
import threading
import time

from vdk.api.job_input import IJobInput


def run(job_input: IJobInput):
methods = ["memory1"] * 50 + ["memory2"] * 50
random.shuffle(methods)

threads = []
for chosen_method in methods:
thread = threading.Thread(
target=ingest_data,
args=(
job_input,
chosen_method,
),
)
threads.append(thread)
thread.start()

for thread in threads:
thread.join()


def ingest_data(job_input: IJobInput, method: str):
time.sleep(random.uniform(0, 0.1))
obj = dict(
int_key=42,
str_key="example_str",
bool_key=True,
float_key=1.23,
nested=dict(key="value"),
)
job_input.send_object_for_ingestion(
payload=obj, destination_table="object_table", method=method
)

rows = [["two", 2], ["twenty-two", 22], ["one-eleven", 111]]
job_input.send_tabular_data_for_ingestion(
rows=rows,
column_names=["first", "second"],
destination_table="tabular_table",
method=method,
)
26 changes: 26 additions & 0 deletions projects/vdk-core/tests/functional/ingestion/test_run_ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from vdk.internal.builtin_plugins.ingestion.ingester_configuration import (
INGESTER_WAIT_TO_FINISH_AFTER_EVERY_SEND,
)
from vdk.plugin.test_utils.util_funcs import cli_assert
from vdk.plugin.test_utils.util_funcs import cli_assert_equal
from vdk.plugin.test_utils.util_funcs import CliEntryBasedTestRunner
from vdk.plugin.test_utils.util_funcs import jobs_path_from_caller_directory
Expand Down Expand Up @@ -109,3 +110,28 @@ def test_ingest_multiple_methods():
assert (
len(ingest_plugin3.payloads) == 0
), "expected 0 (no) payloads for ingest method 'memory3'"


def test_ingest_multiple_methods_multiple_threads():
ingest_plugin1 = IngestIntoMemoryPlugin("memory1")
ingest_plugin2 = IngestIntoMemoryPlugin("memory2")
ingest_plugin3 = IngestIntoMemoryPlugin("memory3")
runner = CliEntryBasedTestRunner(ingest_plugin1, ingest_plugin2, ingest_plugin3)

result: Result = runner.invoke(
["run", jobs_path_from_caller_directory("ingest-multiple-threads-job")]
)

cli_assert_equal(0, result)

assert (
sum(len(p.payload) for p in ingest_plugin1.payloads) == 200
), f"expected 200 payloads for ingest method 'memory1'. Payloads: {ingest_plugin1.payloads} "

assert (
sum(len(p.payload) for p in ingest_plugin2.payloads) == 200
), f"expected 100 payloads for ingest method 'memory2'. Payloads: {ingest_plugin2.payloads} "

assert (
len(ingest_plugin3.payloads) == 0
), "expected 0 (no) payloads for ingest method 'memory3'"

0 comments on commit e17ead2

Please sign in to comment.