-
Notifications
You must be signed in to change notification settings - Fork 0
/
rutte_may2024
241 lines (213 loc) · 11 KB
/
rutte_may2024
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
#Script for metaphor analysis of the 2020 march speech by Rutte introducing covid using ollama
# Import necessary libraries
import requests # Used for making HTTP requests
import json # Used for working with JSON data
import os # Used for interacting with the operating system
import re # Used for regular expressions, for text manipulation
import logging # Used for logging information for debugging
from nltk.tokenize import sent_tokenize # Used for sentence tokenization
import csv # Used for writing CSV files
# Setup logging to record information about program execution and errors
logging.basicConfig(filename='processing_log.txt', level=logging.INFO, format='%(asctime)s:%(levelname)s:%(message)s')
# Function to extract text from a text file with different encoding handling
def extract_text_from_txt(file_path, encoding='utf-8'):
"""Extracts text from a text file with specified encoding."""
logging.info(f"Extracting text from {file_path} with encoding {encoding}")
try:
with open(file_path, 'r', encoding=encoding) as file:
text = file.read()
return re.sub('\s+', ' ', text) # Remove extra whitespaces and return the cleaned text
except UnicodeDecodeError as e:
logging.error(f"UnicodeDecodeError: {e}. Trying with 'latin-1' encoding.")
with open(file_path, 'r', encoding='latin-1') as file:
text = file.read()
return re.sub('\s+', ' ', text) # Remove extra whitespaces and return the cleaned text
# Function to chunk text into sentences
def chunk_text(text):
sentences = sent_tokenize(text)
return sentences
# Common preamble for prompts
def get_preamble():
return """
Definition of a metaphor:
Metaphors are instances in text in which a word or phrase is applied to an object or action to which it is not literally applicable. The word or a phrase is called a 'vehicle' and the object or action to which it is applied is called a 'target'. Metaphors work when the vehicle, which is literally appropriate for another context, brings aspects, or connotations, from that source context with it. In a text an author may draw many metaphors from the same context or from a few contexts. These become ‘families’. Authors may use one family of metaphors more in some parts of the document than they do in others. The shift in use of families matters because it changes the nature of connotations that are associated with the text in which they are found. For example, when a politician shifts from conflict to growth metaphors it becomes more natural to think about working together to produce something better.
Example of a metaphor:
'She is fighting cancer'... In this example, the target is her interaction with cancer, and the vehicle is 'fighting', which is literally meaningful in a context where there is actual combat taking place in which people intentionally pound on each other. This use may bring the connotation that she is actually doing something active to push back, to 'defeat' cancer. In this case the metaphor belongs to the family ‘conflict’.
"""
# Create initial prompt for model to determine if a sentence contains a metaphor
def create_metaphor_check_prompt(sentence):
preamble = get_preamble()
prompt = f"""
{preamble}
Instructions:
Determine if the following sentence contains a metaphor. Respond with "yes" or "no" only.
Sentence: {sentence}
"""
return prompt
# Create detailed prompt for model to analyze metaphors
def create_metaphor_analysis_prompt(sentence):
preamble = get_preamble()
prompt = f"""
{preamble}
Instructions:
Identify the target and the vehicle in the sentence.
Determine the context (or contexts) from which the vehicle is derived.
From each context, identify plausible connotations that may be carried by the vehicle.
Report in JSON format:
{{
"sentence": "{sentence}",
"target": "The target of the metaphor",
"vehicle": "The vehicle of the metaphor",
"source context": "The context from which the vehicle is derived",
"plausible connotations": "The connotations carried by the vehicle"
}}
"""
return prompt
# Function to call the model
def call_model(prompt):
url = "http://127.0.0.1:11434/api/generate"
data = {
"model": "llama3:70b-instruct-q8_0",
"prompt": prompt,
"stream": False,
"temperature": 0,
"n_ctx": 8000,
"seed": 123
}
headers = {'Content-Type': 'application/json'}
response = requests.post(url, json=data, headers=headers)
if response.status_code == 200:
return response.json().get('response', '')
else:
logging.error(f"Failed to get valid response from the model: {response.text}")
return "Error in fetching response."
# Create prompt for model to identify metaphor families
def create_family_identification_prompt(source_contexts):
preamble = get_preamble()
prompt = f"""
{preamble}
Analyze the following source contexts of metaphors and group them into families. Each family should represent a common theme or domain from which the metaphors are drawn, such as growth, conflict, or ecology.
Source Contexts:
{source_contexts}
Provide the families and the contexts that belong to each family in JSON format.
{{
"families": [
{{
"family": "Family name",
"contexts": ["context1", "context2", ...]
}},
...
]
}}
"""
return prompt
# Create prompt for model to classify metaphors by family
def create_classification_prompt(metaphors, families):
preamble = get_preamble()
prompt = f"""
{preamble}
Classify the following metaphors into the identified families. Provide the classification with a probability score (high, medium, low, not) indicating the likelihood of each metaphor belonging to each family.
Metaphors:
{metaphors}
Families:
{families}
Provide the classification in JSON format.
{{
"classifications": [
{{
"metaphor": "Metaphor sentence",
"family": "Family name",
"probability": "Probability score"
}},
...
]
}}
"""
return prompt
# Function to extract JSON from model response
def extract_json_from_response(response):
try:
# Extract JSON portion from the response using regular expressions
json_match = re.search(r'({.*})', response, re.DOTALL)
if json_match:
json_text = json_match.group(0)
extra_text = response.replace(json_text, '').strip()
logging.debug(f"Extracted JSON: {json_text}")
return json.loads(json_text), extra_text
else:
logging.error(f"No JSON found in response: {response}")
return None, response
except json.JSONDecodeError as e:
logging.error(f"JSONDecodeError: {e}. Response was: {response}")
return None, response
# Main function to process text files and identify metaphors, then classify them
def process_text_files_for_metaphors(folder_path, output_file):
data_rows = []
metaphors = []
for filename in os.listdir(folder_path):
if filename.endswith('.txt'):
txt_path = os.path.join(folder_path, filename)
extracted_text = extract_text_from_txt(txt_path)
chunks = chunk_text(extracted_text)
for chunk in chunks:
# Stage 1: Check if the sentence contains a metaphor
check_prompt = create_metaphor_check_prompt(chunk)
check_response = call_model(check_prompt).strip().lower()
if check_response == "yes":
# Stage 2: If metaphor is found, perform detailed analysis
analysis_prompt = create_metaphor_analysis_prompt(chunk)
analysis_response = call_model(analysis_prompt)
analysis_result, extra_text = extract_json_from_response(analysis_response)
if analysis_result:
data_rows.append({
"Filename": filename,
"Sentence": analysis_result["sentence"],
"Target": analysis_result["target"],
"Vehicle": analysis_result["vehicle"],
"Source Context": analysis_result["source context"],
"Plausible Connotations": analysis_result["plausible connotations"],
"Extra": extra_text
})
metaphors.append({
"sentence": analysis_result["sentence"],
"source context": analysis_result["source context"]
})
else:
logging.debug(f"Extra text stored: {extra_text}")
# Step 3: Identify families of metaphors
source_contexts = [m["source context"] for m in metaphors]
family_prompt = create_family_identification_prompt(source_contexts)
family_response = call_model(family_prompt)
family_result, extra_text = extract_json_from_response(family_response)
if family_result:
families = family_result["families"]
logging.debug("Metaphor families identified")
else:
families = []
logging.debug(f"Extra text stored: {extra_text}")
# Step 4: Classify metaphors by family
classification_prompt = create_classification_prompt(metaphors, families)
classification_response = call_model(classification_prompt)
classification_result, extra_text = extract_json_from_response(classification_response)
if classification_result:
classifications = classification_result["classifications"]
for classification in classifications:
for row in data_rows:
if row["Sentence"] == classification["metaphor"]:
row["Family"] = classification["family"]
row["Probability"] = classification["probability"]
logging.debug("Metaphor classification completed")
else:
logging.debug(f"Extra text stored: {extra_text}")
# Output the results to CSV
csv_columns = ["Filename", "Sentence", "Target", "Vehicle", "Source Context", "Plausible Connotations", "Family", "Probability", "Extra"]
with open(output_file, 'w') as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=csv_columns)
writer.writeheader()
for row in data_rows:
writer.writerow(row)
logging.info("Metaphor identification and analysis complete and saved to CSV.")
# Specify the paths and process the text files
txt_folder = '/Users/peter/ai-computing-assistant/jupyter/metaphor/data' # Path to the folder containing text files
output_csv = '/Users/peter/ai-computing-assistant/jupyter/metaphor/output.csv' # Path to the output CSV file
process_text_files_for_metaphors(txt_folder, output_csv) # Call the function to process the files