-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtropes.mk
155 lines (130 loc) · 7.17 KB
/
tropes.mk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#
# Make file for trope related tasks
#
# Other than top level tasks, tasks are split into three categorie
#
# Extract — extraction of data from the raw sparql query files and basic filtering
# Download - Downloading/scraping data from remote sources
# Analyse - Running analysis like NLP tasks, log likelyhood etc.
#
# Top level tasks try and summarize the main steps from each section.
# into tasks we may want to call directly without specifying the destination file(s).
#
#####################
# Shared variables
#####################
RESULTS_DIR = data/results
ANALYSIS_DIR = data/analysis
#####################
# Top Level tasks
#####################
extract: $(RESULTS_DIR)/only_tropes-female.json $(RESULTS_DIR)/only_tropes-male.json
download: images_female images_male
analyse: corpus log_likelyhood
#####################
# Extract
#####################
# Trope extraction from raw sparql files
$(RESULTS_DIR)/tropes-female.json: data/raw/TropesWithDescription-Female.json
mkdir -p $(RESULTS_DIR)
python -m src.preprocess.process_tropes --command extract_tropes --source $< --dest $@
touch -c $@
$(RESULTS_DIR)/tropes-male.json: data/raw/TropesWithDescription-Male.json
python -m src.preprocess.process_tropes --command extract_tropes --source $< --dest $@
touch -c $@
$(RESULTS_DIR)/tropes-unisex.json: data/raw/TropesWithDescription-Unisex.json
python -m src.preprocess.process_tropes --command extract_tropes --source $< --dest $@
touch -c $@
# Category Filtering, removes unisex tropes from the female and male lists of tropes.
$(RESULTS_DIR)/only_tropes-female.json: $(RESULTS_DIR)/tropes-female.json $(RESULTS_DIR)/tropes-unisex.json
python -m src.preprocess.process_tropes --command filter_tropes --source $(RESULTS_DIR)/tropes-female.json $(RESULTS_DIR)/tropes-unisex.json --dest $@
touch -c $@
$(RESULTS_DIR)/only_tropes-male.json: $(RESULTS_DIR)/tropes-male.json $(RESULTS_DIR)/tropes-unisex.json
python -m src.preprocess.process_tropes --command filter_tropes --source $(RESULTS_DIR)/tropes-male.json $(RESULTS_DIR)/tropes-unisex.json --dest $@
touch -c $@
#####################
# Download
#
# Note: these tasks do not specify dependencies, they will always run when called.
#####################
images_female:
mkdir -p $(RESULTS_DIR)/images/female
python -m src.preprocess.process_tropes --command get_images --source $(RESULTS_DIR)/only_tropes-female.json --dest $(RESULTS_DIR)/images/female
images_male:
mkdir -p $(RESULTS_DIR)/images/male
python -m src.preprocess.process_tropes --command get_images --source $(RESULTS_DIR)/only_tropes-male.json --dest $(RESULTS_DIR)/images/male
#####################
# Analyse
#####################
corpus: $(RESULTS_DIR)/base_corpus.json $(RESULTS_DIR)/corpus-female.json $(RESULTS_DIR)/corpus-male.json
log_likelyhood: $(ANALYSIS_DIR)/ll-male.json $(ANALYSIS_DIR)/ll-female.json $(ANALYSIS_DIR)/trope_ll-male.json $(ANALYSIS_DIR)/trope_ll-female.json
cluster: $(ANALYSIS_DIR)/trope_clusters-female.json $(ANALYSIS_DIR)/male_trope_clusters.json $(ANALYSIS_DIR)/all_trope_clusters.json
#
# Trope NLP tagging
#
$(RESULTS_DIR)/tropes_tagged-female.json: $(RESULTS_DIR)/only_tropes-female.json
mkdir -p $(RESULTS_DIR)
python -m src.preprocess.process_tropes --command tag_tropes --source $< --dest $@
touch -c $@
$(RESULTS_DIR)/tropes_tagged-male.json: $(RESULTS_DIR)/only_tropes-male.json
python -m src.preprocess.process_tropes --command tag_tropes --source $< --dest $@
touch -c $@
#
# Adjective extraction
#
$(RESULTS_DIR)/tropes_adjectives-female.json: $(RESULTS_DIR)/tropes_tagged-female.json data/handmade/exclude_adjectives.json
python -m src.preprocess.process_tropes --command extract_adjectives --source $< --dest $@ --exclude_adj data/handmade/exclude_adjectives.json
touch -c $@
$(RESULTS_DIR)/tropes_adjectives-male.json: $(RESULTS_DIR)/tropes_tagged-male.json data/handmade/exclude_adjectives.json
python -m src.preprocess.process_tropes --command extract_adjectives --source $< --dest $@ --exclude_adj data/handmade/exclude_adjectives.json
touch -c $@
#
# Make corpora for 3 main large groups. All adjectives, all female and all male.
#
$(RESULTS_DIR)/base_corpus.json: $(RESULTS_DIR)/tropes_adjectives-female.json $(RESULTS_DIR)/tropes_adjectives-male.json
python -m src.preprocess.process_tropes --command make_base_corpus --source $(RESULTS_DIR)/tropes_adjectives-female.json $(RESULTS_DIR)/tropes_adjectives-male.json --dest $@
touch -c $@
$(RESULTS_DIR)/corpus-female.json: $(RESULTS_DIR)/tropes_adjectives-female.json
python -m src.preprocess.process_tropes --command make_base_corpus --source $(RESULTS_DIR)/tropes_adjectives-female.json --dest $@
touch -c $@
$(RESULTS_DIR)/corpus-male.json: $(RESULTS_DIR)/tropes_adjectives-male.json
python -m src.preprocess.process_tropes --command make_base_corpus --source $(RESULTS_DIR)/tropes_adjectives-male.json --dest $@
touch -c $@
#
# Calculate adjective log-likely hood for the two large corpora (all female adjs and all male adjs).
#
$(ANALYSIS_DIR)/ll-male.json: $(RESULTS_DIR)/base_corpus.json $(RESULTS_DIR)/corpus-male.json
mkdir -p $(ANALYSIS_DIR)
python -m src.preprocess.analyse_data --command log_likelyhood --source $(RESULTS_DIR)/corpus-male.json $(RESULTS_DIR)/base_corpus.json --dest $@
touch -c $@
$(ANALYSIS_DIR)/ll-female.json: $(RESULTS_DIR)/base_corpus.json $(RESULTS_DIR)/corpus-female.json
mkdir -p $(ANALYSIS_DIR)
python -m src.preprocess.analyse_data --command log_likelyhood --source $(RESULTS_DIR)/corpus-female.json $(RESULTS_DIR)/base_corpus.json --dest $@
touch -c $@
#
# Calculate adjective log-likely hood for each trope as compared to the base corpus of all adjectives.
#
$(ANALYSIS_DIR)/trope_ll-male.json: $(RESULTS_DIR)/base_corpus.json $(RESULTS_DIR)/tropes_adjectives-male.json
mkdir -p $(ANALYSIS_DIR)
python -m src.preprocess.analyse_data --command trope_log_likelyhood --source $(RESULTS_DIR)/tropes_adjectives-male.json $(RESULTS_DIR)/base_corpus.json --dest $@
touch -c $@
$(ANALYSIS_DIR)/trope_ll-female.json: $(RESULTS_DIR)/base_corpus.json $(RESULTS_DIR)/tropes_adjectives-female.json
mkdir -p $(ANALYSIS_DIR)
python -m src.preprocess.analyse_data --command trope_log_likelyhood --source $(RESULTS_DIR)/tropes_adjectives-female.json $(RESULTS_DIR)/base_corpus.json --dest $@
touch -c $@
#
# Cluster the tropes by adjective use
# (note: we do not currently use this analysis, this it will be not be run by the top level tasks)
#
$(ANALYSIS_DIR)/trope_clusters-female.json: $(RESULTS_DIR)/tropes_adjectives-female.json
mkdir -p $(ANALYSIS_DIR)
python -m src.preprocess.analyse_data --command cluster --source $(RESULTS_DIR)/tropes_adjectives-female.json --dest $@ --num_clusters 40
touch -c $@
$(ANALYSIS_DIR)/male_trope_clusters.json: $(RESULTS_DIR)/tropes_adjectives-male.json
mkdir -p $(ANALYSIS_DIR)
python -m src.preprocess.analyse_data --command cluster --source $(RESULTS_DIR)/tropes_adjectives-male.json --dest $@ --num_clusters 40
touch -c $@
$(ANALYSIS_DIR)/all_trope_clusters.json: $(RESULTS_DIR)/tropes_adjectives-male.json $(RESULTS_DIR)/tropes_adjectives-female.json
mkdir -p $(ANALYSIS_DIR)
python -m src.preprocess.analyse_data --command cluster --source $(RESULTS_DIR)/tropes_adjectives-male.json $(RESULTS_DIR)/tropes_adjectives-female.json --dest $@ --num_clusters 40
touch -c $@