Skip to content

Commit

Permalink
bugfix: handle relative path problem in tests
Browse files Browse the repository at this point in the history
  • Loading branch information
cyruszhang committed Feb 14, 2025
1 parent 1d65a3a commit 96a4997
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 10 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@ venv/

# dup files created by tests
tests/ops/data/*dup*
tests/tools/tmp_*/
2 changes: 1 addition & 1 deletion tests/core/data/test_config_ray.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ project_name: 'ray-demo-new-config'
dataset:
configs:
- type: local
path: ../../demos/process_on_ray/data/demo-dataset.jsonl # path to your dataset directory or file
path: ./data/sample.json # path to your dataset directory or file
weight: 1.0

export_path: './outputs/demo/demo-processed'
Expand Down
4 changes: 2 additions & 2 deletions tests/core/test_dataset_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,7 @@ def test_invalid_max_sample_num(self):

def test_builder_ray_config(self):
"""Test loading Ray configuration from YAML"""
test_config_file = os.path.join(WORK_DIR, 'data/test_config_ray.yaml')
test_config_file = os.path.join(WORK_DIR, 'data', 'test_config_ray.yaml')
out = StringIO()
with redirect_stdout(out):
cfg = init_configs(args=f'--config {test_config_file}'.split())
Expand All @@ -489,7 +489,7 @@ def test_builder_ray_config(self):
self.assertEqual(cfg.dataset, {
'configs': [{
'type': 'local',
'path': '../../demos/process_on_ray/data/demo-dataset.jsonl',
'path': './data/sample.json',
'weight': 1.0
}]
})
Expand Down
22 changes: 15 additions & 7 deletions tests/tools/test_process_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,10 @@ def _test_status_code(self, yaml_file, output_path, text_keys):
with open(yaml_file, 'w') as file:
yaml.dump(yaml_config, file)

script_path = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.realpath(__file__)))),
"tools", "process_data.py")
status_code = subprocess.call(
f'python tools/process_data.py --config {yaml_file}', shell=True)
f'python {script_path} --config {yaml_file}', shell=True)

return status_code

Expand Down Expand Up @@ -95,6 +97,8 @@ def setUp(self):

cur_dir = osp.dirname(osp.abspath(__file__))
self.tmp_dir = osp.join(cur_dir, f'tmp_{uuid.uuid4().hex}')
self.script_path = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.realpath(__file__)))),
"tools", "process_data.py")
os.makedirs(self.tmp_dir, exist_ok=True)

def tearDown(self):
Expand All @@ -113,7 +117,7 @@ def test_ray_image(self):
text_keys = 'text'

data_path = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.realpath(__file__)))),
'demos', 'data', 'demo-dataset-images.jsonl')
'demos', 'data', 'demo-dataset-images.jsonl')
yaml_config = {
'dataset_path': data_path,
'executor_type': 'ray',
Expand Down Expand Up @@ -141,7 +145,8 @@ def test_ray_image(self):
with open(tmp_yaml_file, 'w') as file:
yaml.dump(yaml_config, file)

run_in_subprocess(f'python tools/process_data.py --config {tmp_yaml_file}')
print(f"Is the config file present? {os.path.exists(tmp_yaml_file)}")
run_in_subprocess(f'python {self.script_path} --config {tmp_yaml_file}')

self.assertTrue(osp.exists(tmp_out_path))

Expand Down Expand Up @@ -184,7 +189,9 @@ def test_ray_precise_dedup(self):
with open(tmp_yaml_file, 'w') as file:
yaml.dump(yaml_config, file)

run_in_subprocess(f'python tools/process_data.py --config {tmp_yaml_file}')
script_path = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.realpath(__file__)))),
"tools", "process_data.py")
run_in_subprocess(f'python {script_path} --config {tmp_yaml_file}')

self.assertTrue(osp.exists(tmp_out_path))

Expand Down Expand Up @@ -227,7 +234,7 @@ def test_ray_minhash_dedup(self):
with open(tmp_yaml_file, 'w') as file:
yaml.dump(yaml_config, file)

run_in_subprocess(f'python tools/process_data.py --config {tmp_yaml_file}')
run_in_subprocess(f'python {self.script_path} --config {tmp_yaml_file}')

self.assertTrue(osp.exists(tmp_out_path))

Expand Down Expand Up @@ -282,7 +289,7 @@ def test_ray_compute_stats_single_filter(self):
with open(tmp_yaml_file, 'w') as file:
yaml.dump(yaml_config, file)

run_in_subprocess(f'python tools/process_data.py --config {tmp_yaml_file}')
run_in_subprocess(f'python {self.script_path} --config {tmp_yaml_file}')

self.assertTrue(osp.exists(tmp_out_path))

Expand Down Expand Up @@ -344,7 +351,8 @@ def test_ray_compute_stats_batched_filter(self):
with open(tmp_yaml_file, 'w') as file:
yaml.dump(yaml_config, file)

run_in_subprocess(f'python tools/process_data.py --config {tmp_yaml_file}')

run_in_subprocess(f'python {self.script_path} --config {tmp_yaml_file}')

self.assertTrue(osp.exists(tmp_out_path))

Expand Down

0 comments on commit 96a4997

Please sign in to comment.