Skip to content

Commit

Permalink
Merge pull request #493 from dice-group/retrieval_eval_fixes
Browse files Browse the repository at this point in the history
fixes memory issue by writing csv to disk and sample ratio issue for …
  • Loading branch information
Demirrr authored Nov 16, 2024
2 parents 77fae24 + 718a1b8 commit b2782e4
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 24 deletions.
53 changes: 31 additions & 22 deletions examples/retrieval_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,12 @@
import random
import itertools
import ast

# Set pandas options to ensure full output
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.colheader_justify', 'left')
pd.set_option('display.expand_frame_repr', False)

def execute(args):
# (1) Initialize knowledge base.
Expand All @@ -86,9 +91,9 @@ def execute(args):
object_properties = sorted({i for i in symbolic_kb.get_object_properties()})

# (3.1) Subsample if required.
if args.ratio_sample_object_prop:
if args.ratio_sample_object_prop and len(object_properties) > 0:
object_properties = {i for i in random.sample(population=list(object_properties),
k=max(0, int(len(object_properties) * args.ratio_sample_object_prop)))}
k=max(1, int(len(object_properties) * args.ratio_sample_object_prop)))}

object_properties = set(object_properties)

Expand All @@ -103,9 +108,9 @@ def execute(args):



if args.ratio_sample_nc:
if args.ratio_sample_nc and len(nc) > 0:
# (6.1) Subsample if required.
nc = {i for i in random.sample(population=list(nc), k=max(0, int(len(nc) * args.ratio_sample_nc)))}
nc = {i for i in random.sample(population=list(nc), k=max(1, int(len(nc) * args.ratio_sample_nc)))}

nc = set(nc) # return to a set
# (7) NC⁻: Complement of NC.
Expand Down Expand Up @@ -211,7 +216,10 @@ def concept_retrieval(retriever_func, c) -> Tuple[Set[str], float]:
# () Shuffled the data so that the progress bar is not influenced by the order of concepts.

random.shuffle(concepts)

# check if csv arleady exists and delete it cause we want to override it
if os.path.exists(args.path_report):
os.remove(args.path_report)
file_exists = False
# () Iterate over single OWL Class Expressions in ALCQIHO
for expression in (tqdm_bar := tqdm(concepts, position=0, leave=True)):
retrieval_y: Set[str]
Expand All @@ -225,8 +233,8 @@ def concept_retrieval(retriever_func, c) -> Tuple[Set[str], float]:
# () Compute the F1-score.
f1_sim = f1_set_similarity(retrieval_y, retrieval_neural_y)
# () Store the data.
data.append(
{
df_row = pd.DataFrame(
[{
"Expression": owl_expression_to_dl(expression),
"Type": type(expression).__name__,
"Jaccard Similarity": jaccard_sim,
Expand All @@ -235,34 +243,35 @@ def concept_retrieval(retriever_func, c) -> Tuple[Set[str], float]:
"Runtime Neural": runtime_neural_y,
"Symbolic_Retrieval": retrieval_y,
"Symbolic_Retrieval_Neural": retrieval_neural_y,
}
)
}])
# Append the row to the CSV file
df_row.to_csv(args.path_report, mode='a', header=not file_exists, index=False)
file_exists = True
# () Update the progress bar.
tqdm_bar.set_description_str(
f"Expression: {owl_expression_to_dl(expression)} | Jaccard Similarity:{jaccard_sim:.4f} | F1 :{f1_sim:.4f} | Runtime Benefits:{runtime_y - runtime_neural_y:.3f}"
)
# () Read the data into pandas dataframe
df = pd.DataFrame(data)
assert df["Jaccard Similarity"].mean() >= args.min_jaccard_similarity
# () Save the experimental results into csv file.
df.to_csv(args.path_report)
del df
# () Load the saved CSV file.
df = pd.read_csv(args.path_report, index_col=0, converters={'Symbolic_Retrieval': lambda x: ast.literal_eval(x),
'Symbolic_Retrieval_Neural': lambda x: ast.literal_eval(
x)})
# () A retrieval result can be parsed into set of instances to python object.
'Symbolic_Retrieval_Neural': lambda x: ast.literal_eval(x)})
# () Assert that the mean Jaccard Similarity meets the threshold
assert df["Jaccard Similarity"].mean() >= args.min_jaccard_similarity

# () Ensure 'Symbolic_Retrieval_Neural' contains sets
x = df["Symbolic_Retrieval_Neural"].iloc[0]
assert isinstance(x, set)
# () Extract the numerical features.

# () Extract numerical features
numerical_df = df.select_dtypes(include=["number"])
# () Extract the type of owl concepts

# () Group by the type of OWL concepts
df_g = df.groupby(by="Type")
print(df_g["Type"].count())

# () Compute mean of numerical columns per group
mean_df = df_g[numerical_df.columns].mean()
print(mean_df)


def get_default_arguments():
parser = ArgumentParser()
parser.add_argument("--path_kg", type=str, default="KGs/Family/father.owl")
Expand Down
4 changes: 2 additions & 2 deletions examples/retrieval_eval_under_incomplete.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def execute(args):
data = []

if args.sample == "Yes":
subprocess.run(['python', 'examples/retrieval_eval.py', "--path_kg", path, "--ratio_sample_nc","0.1", "--ratio_sample_object_prob", "0.2", "--path_report", path_report])
subprocess.run(['python', 'examples/retrieval_eval.py', "--path_kg", path, "--ratio_sample_nc","0.1", "--ratio_sample_object_prop", "0.2", "--path_report", path_report])
else:
subprocess.run(['python', 'examples/retrieval_eval.py', "--path_kg", path, "--path_report", path_report])

Expand Down Expand Up @@ -235,7 +235,7 @@ def get_default_arguments():
parser.add_argument("--path_kg", type=str, default="KGs/Family/family-benchmark_rich_background.owl")
parser.add_argument("--seed", type=int, default=1)
parser.add_argument("--ratio_sample_nc", type=float, default=None, help="To sample OWL Classes.")
parser.add_argument("--ratio_sample_object_prob", type=float, default=None, help="To sample OWL Object Properties.")
parser.add_argument("--ratio_sample_object_prop", type=float, default=None, help="To sample OWL Object Properties.")
parser.add_argument("--path_report", type=str, default="ALCQHI_Retrieval_Incomplete_Results.csv")
parser.add_argument("--number_of_subgraphs", type=int, default=1)
parser.add_argument("--ratio", type=float, default=0.1, \
Expand Down

0 comments on commit b2782e4

Please sign in to comment.