Skip to content

Commit

Permalink
fix: handling infinite loop (#812)
Browse files Browse the repository at this point in the history
There was a bug in PSA where if in splunk_ingest_data first worker
errors out or throws exception then it does not write to a file, now
other workers would be waiting indefinitely for the file to be present,
So the execution would continue in infinitly.

To handle this scenario, we can handle the exception thrown by the first
worker and make sure that in any case it writes to a file, so that other
workers don't wait indefinitly
  • Loading branch information
harshilgajera-crest authored Apr 19, 2024
1 parent 6a16609 commit d3111a6
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 28 deletions.
13 changes: 5 additions & 8 deletions .github/workflows/build-test-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -120,18 +120,15 @@ jobs:
run: |
pip install git+https://github.com/pixelb/crudini
mkdir test-results-${{ matrix.splunk.version }}
- name: Splunk Up
- name: Test
run: |
export SPLUNK_APP_PACKAGE=./tests/e2e/addons/TA_fiction
export SPLUNK_ADDON=TA_fiction
export SPLUNK_APP_ID=TA_fiction
export SPLUNK_APP_PACKAGE=./tests/e2e/addons/TA_fiction_indextime
export SPLUNK_ADDON=TA_fiction_indextime
export SPLUNK_APP_ID=TA_fiction_indextime
export SPLUNK_VERSION=${{ matrix.splunk.version }}
export SPLUNK_HEC_TOKEN="9b741d03-43e9-4164-908b-e09102327d22"
echo $SPLUNK_VERSION
docker compose -f "docker-compose-ci.yml" build
SPLUNK_PASSWORD=Chang3d! docker compose -f docker-compose-ci.yml up -d splunk
sleep 90
- name: Test
run: |
SPLUNK_PASSWORD=Chang3d! docker compose -f docker-compose-ci.yml up --abort-on-container-exit
docker volume ls
- name: Collect Results
Expand Down
27 changes: 15 additions & 12 deletions pytest_splunk_addon/splunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -750,18 +750,21 @@ def splunk_ingest_data(request, splunk_hec_uri, sc4s, uf, splunk_events_cleanup)
}
thread_count = int(request.config.getoption("thread_count"))
store_events = request.config.getoption("store_events")
IngestorHelper.ingest_events(
ingest_meta_data,
addon_path,
config_path,
thread_count,
store_events,
)
sleep(50)
if "PYTEST_XDIST_WORKER" in os.environ:
with open(os.environ.get("PYTEST_XDIST_TESTRUNUID") + "_wait", "w+"):
PYTEST_XDIST_TESTRUNUID = os.environ.get("PYTEST_XDIST_TESTRUNUID")

try:
IngestorHelper.ingest_events(
ingest_meta_data,
addon_path,
config_path,
thread_count,
store_events,
)
sleep(50)
except Exception as e:
raise e
finally:
if "PYTEST_XDIST_WORKER" in os.environ:
with open(os.environ.get("PYTEST_XDIST_TESTRUNUID") + "_wait", "w+"):
PYTEST_XDIST_TESTRUNUID = os.environ.get("PYTEST_XDIST_TESTRUNUID")
else:
while not os.path.exists(os.environ.get("PYTEST_XDIST_TESTRUNUID") + "_wait"):
sleep(1)
Expand Down
61 changes: 53 additions & 8 deletions tests/e2e/test_splunk_addon.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def test_splunk_connection_external(testdir, request):
# fnmatch_lines does an assertion internally
result.assert_outcomes(passed=1, failed=0)

# make sure that that we get a '0' exit code for the testsuite
# make sure that we get a '0' exit code for the testsuite
assert result.ret == 0


Expand Down Expand Up @@ -117,7 +117,7 @@ def test_splunk_connection_docker(testdir, request):
# fnmatch_lines does an assertion internally
result.assert_outcomes(passed=1, failed=0)

# make sure that that we get a '0' exit code for the testsuite
# make sure that we get a '0' exit code for the testsuite
assert result.ret == 0


Expand Down Expand Up @@ -165,7 +165,7 @@ def empty_method():
skipped=len(constants.TA_FICTION_SKIPPED),
)

# make sure that that we get a '0' exit code for the testsuite
# make sure that we get a '0' exit code for the testsuite
assert result.ret == 0


Expand Down Expand Up @@ -328,7 +328,7 @@ def empty_method():
skipped=len(constants.TA_CIM_FICTION_SKIPPED),
)

# make sure that that we get a '0' exit code for the testsuite
# make sure that we get a '0' exit code for the testsuite
assert result.ret == 0


Expand Down Expand Up @@ -439,7 +439,7 @@ def empty_method():
failed=0,
)

# make sure that that we get a '0' exit code for the testsuite
# make sure that we get a '0' exit code for the testsuite
assert result.ret == 0


Expand Down Expand Up @@ -627,7 +627,7 @@ def empty_method():
skipped=len(constants.TA_REQ_TRANSITION_SKIPPED),
)

# make sure that that we get a non '0' exit code for the testsuite as it contains failure
# make sure that we get a non '0' exit code for the testsuite as it contains failure
assert result.ret == 0, "result not equal to 0"


Expand Down Expand Up @@ -682,7 +682,7 @@ def empty_method():
skipped=len(constants.TA_REQ_BROKEN_SKIPPED),
)

# make sure that that we get a non '0' exit code for the testsuite as it contains failure
# make sure that we get a non '0' exit code for the testsuite as it contains failure
assert result.ret != 0


Expand Down Expand Up @@ -737,5 +737,50 @@ def empty_method():
skipped=len(constants.TA_REQ_TRANSITION_SKIPPED),
)

# make sure that that we get a non '0' exit code for the testsuite as it contains failure
# make sure that we get a non '0' exit code for the testsuite as it contains failure
assert result.ret == 0, "result not equal to 0"


@pytest.mark.test_infinite_loop_fixture
@pytest.mark.external
def test_infinite_loop_in_ingest_data_fixture(testdir, request):
"""Make sure that pytest accepts our fixture."""

testdir.makepyfile(
"""
from pytest_splunk_addon.standard_lib.addon_basic import Basic
class Test_App(Basic):
def empty_method():
pass
"""
)

shutil.copytree(
os.path.join(testdir.request.fspath.dirname, "addons/TA_fiction_indextime"),
os.path.join(testdir.tmpdir, "package"),
)

shutil.copytree(
os.path.join(testdir.request.fspath.dirname, "test_data_models"),
os.path.join(testdir.tmpdir, "tests/data_models"),
)

setup_test_dir(testdir)
SampleGenerator.clean_samples()
Rule.clean_rules()

# run pytest with the following cmd args
# we are providing wrong sc4s service details here so that we can recreate scenario where first worked raises exception and other workers get stuck
result = testdir.runpytest(
"--splunk-app=addons/TA_fiction_indextime",
"--splunk-type=external",
"--splunk-host=splunk",
"--splunk-data-generator=tests/addons/TA_fiction_indextime/default",
"--sc4s-host=splunk",
"--sc4s-port=100",
"-n 2",
)

# Here we are not interested in the failures or errors,
# we are basically checking that we get results and test execution does not get stuck
assert result.parseoutcomes().get("passed") > 0

0 comments on commit d3111a6

Please sign in to comment.