-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #56 from JHU-CLSP/mainaug15
[WIP]
- Loading branch information
Showing
107 changed files
with
7,005 additions
and
7,019 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
Associate countries and languages with Ethnologue | ||
atomic_event2event-effects 4 | ||
Author In-Group Analysis Phrase Classification 2 | ||
Compile list of area chairs | ||
Elicitation obj | ||
Elicitation subj | ||
Full sentence style annotations | ||
Gun violence structured extraction | ||
Lattice | ||
NER - Task scruples 26,200 - 30,922 | ||
neural-pop (PLAN evaluation) t5-human-test b | ||
Paraphrase Clustering with Merge | ||
Photo Collection GVDB | ||
Radiology Report Sentence Classification | ||
Reddit In-group Analysis Comment annotation 3 | ||
ROT Details [m=50] rocstories - 0 - 99 | ||
Scalar Adjectives Identification | ||
Script KD eval LONG V2 - disc result eval 1 | ||
Sherlock IMG 2 TXT Eval 15 | ||
Spanish Word Alignment | ||
wikiHow step-goal linking pilot cleanse-url | ||
winogrande validation (grammar) additional_ph | ||
sandbox_audio_quality | ||
sandbox_figure_descriptions | ||
sandbox_lamecows | ||
sandbox_scambaiting |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
ANES 2008 open-ended survey | ||
Recreation of the Dan Johnson | ||
Congressional Bills 5 point |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,14 @@ | ||
GitPython | ||
selenium==4.8.2 | ||
beautifulsoup4 | ||
requests | ||
pandas | ||
rouge-score | ||
python-dateutil | ||
colorama | ||
GitPython==3.1.31 | ||
beautifulsoup4==4.11.2 | ||
requests==2.28.2 | ||
pandas==1.5.3 | ||
rouge-score==0.1.2 | ||
python-dateutil==2.8.2 | ||
colorama==0.4.6 | ||
Pillow==9.4.0 | ||
transformers==4.26.1 | ||
tqdm==4.64.1 | ||
numpy==1.24.1 | ||
boto3==1.28.34 | ||
jsonlines==3.1.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,27 +1,42 @@ | ||
import pandas as pd | ||
import os | ||
|
||
""" | ||
Script for removing unnecessary data from csv files | ||
""" | ||
|
||
|
||
def create_input(csv_file): | ||
df = pd.read_csv(csv_file, low_memory=False) | ||
df = df.loc[:, ~df.columns.str.startswith('Answer.')] | ||
df.drop_duplicates(inplace=True) | ||
df.to_csv(csv_file.replace('batch.csv', 'input.csv') , encoding='utf-8-sig', index=False) | ||
df.to_csv(csv_file.replace('batch.csv', 'input.csv'), encoding='utf-8-sig', index=False) | ||
|
||
|
||
ROOT = '../tasks' | ||
|
||
if __name__ == '__main__': | ||
# ensure that ../tasks is available | ||
if not os.path.exists('../tasks'): | ||
|
||
if not os.path.exists(ROOT): | ||
raise Exception("No directory named `tasks` found. Make sure that you run this script in the `src/` directory") | ||
|
||
for root, dirs, files in os.walk('../tasks'): | ||
# if files is empty then show an error | ||
if not files or len(files) == 0: | ||
raise Exception(f"No files in the specified directory: {dirs}") | ||
items = os.listdir(ROOT) | ||
# if files is empty then show an error | ||
if len(items) == 0: | ||
raise Exception(f"No files in the specified directory `{items}`: {ROOT}") | ||
|
||
for item in items: | ||
|
||
# skip if not a dir | ||
if not os.path.isdir(os.path.join(ROOT, item)): | ||
continue | ||
|
||
file = os.path.join(ROOT, item, 'batch.csv') | ||
|
||
# make sure the file exists | ||
if not os.path.exists(file): | ||
raise Exception(f"File `{file}` does not exist") | ||
|
||
for file in files: | ||
if file.endswith('batch.csv'): | ||
path = os.path.join(root, file) | ||
print(' ** Reading: ' + path) | ||
create_input(path) | ||
print(' ** Reading: ' + file) | ||
create_input(file) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.