Skip to content

Commit

Permalink
Merge pull request #27 from googleinterns/test_and_format
Browse files Browse the repository at this point in the history
Test and format
  • Loading branch information
naveenram00 authored Jul 30, 2021
2 parents e899042 + 76962d1 commit 346f3cc
Show file tree
Hide file tree
Showing 7 changed files with 240 additions and 49 deletions.
17 changes: 13 additions & 4 deletions data/build_probe_1_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@
flags.DEFINE_integer("probe_min_pop", 30, "minimum popularity to be in probe")
flags.DEFINE_integer("popular_min_pop", 138, "minimum popularity to be"
+ " considered a popular movie")
flags.DEFINE_enum("format", "normal", ["normal", "sequences"],
"specify the probe format: normal for pairs in dialogue, "
+ "sequences for movie only probes for sequences task")


def create_pmi(co_matrix, movie_ids):
Expand Down Expand Up @@ -242,10 +245,16 @@ def parse_sequence(sequence_str):
random_list = random.sample(popular_movies, k=10)

for related, rand in zip(related_list, random_list):
prompt = f"[User] Can you recommend me a movie like @ {movie} @"
probes.append(f"{prompt}\tSure, have you seen @ {related} @?")
probes.append(f"{prompt}\tSure, have you seen @ {rand} @?")
probe_1_path = constants.PROBE_1_TSV_PATH["validation"]
if FLAGS.format == "sequences":
probes.append(f"@ {movie} @\t{related}")
probes.append(f"@ {movie} @\t{rand}")
path, extension = constants.PROBE_1_TSV_PATH["validation"].split(".")
probe_1_path = path + "_sequences" + "." + extension
else:
prompt = f"[User] Can you recommend me a movie like @ {movie} @"
probes.append(f"{prompt}\tSure, have you seen @ {related} @?")
probes.append(f"{prompt}\tSure, have you seen @ {rand} @?")
probe_1_path = constants.PROBE_1_TSV_PATH["validation"]

logging.info("%d pairs generated", len(probes))
with tf.io.gfile.GFile(probe_1_path, "w") as f:
Expand Down
144 changes: 144 additions & 0 deletions data/visualize_popularities.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
# Copyright 2020 Google LLC

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

# https://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Visualize the popularity bias for a given model's Probe 1."""

import json
import os

from absl import app
from absl import flags
from absl import logging
import numpy as np
import plotly.graph_objects as go
import tensorflow.compat.v1 as tf
from trainer import constants


FLAGS = flags.FLAGS

flags.DEFINE_enum("size", "base", ["small", "base", "large", "3B", "11B"],
"model size")
flags.DEFINE_string("name", "default", "name/description of model version")
flags.DEFINE_string("subfolder", None, ("subfolder under size folder to put ",
"model in. if None, the model folder",
" will be in bucket/models/size"))


def tf_load_txt(filepath):
"""Load newline separated text from gs:// using tf.io.
Args:
filepath: path of the file to be read
Returns:
a list of strings contining the lines of the file
"""
with tf.io.gfile.GFile(filepath, "r") as txt_file:
data = []
for row in list(txt_file):
data.append(str(row.replace("\n", "")))
return data


def load_probe_data(model_dir, probe):
"""Load the probe data of a given model.
Args:
model_dir: the directory of a given model
probe: the name of the probe
Returns:
a tuple containing the inputs, targets, predictions and steps
"""
eval_path = os.path.join(model_dir, "validation_eval")
inputs = [x[2:-1] for x in tf_load_txt(os.path.join(eval_path,
f"{probe}_inputs"))]
targets = tf_load_txt(os.path.join(eval_path, f"{probe}_targets"))
prediction_path = os.path.join(eval_path, f"{probe}*_predictions")
prediction_files = sorted(tf.io.gfile.glob(prediction_path),
key=lambda x: int(x.split("_")[-2]))
predictions = []
steps = []

for pred_file in prediction_files:
ckpt_step = int(pred_file.split("_")[-2])
steps.append(ckpt_step)
predictions.append(tf_load_txt(pred_file))

return inputs, targets, predictions, steps


def main(_):

# set the model dir
model_dir = os.path.join(constants.MODELS_DIR, FLAGS.size)
if FLAGS.subfolder is not None:
model_dir = os.path.join(model_dir, FLAGS.subfolder)
model_dir = os.path.join(model_dir, FLAGS.name)

# load the popularity data
with tf.io.gfile.GFile(constants.MATRIX_PATHS["movie_ids"], "r") as f:
movie_ids = json.load(f)

# load the probe 1 data for the given model
inputs, targets, predictions, steps = load_probe_data(model_dir, "probe_1")
predictions = predictions[-1]
steps = steps[-1]
movie_ids["popularity"] = {k.lower(): v for k, v
in movie_ids["popularity"].items()}

# keep track of the correctly and incorrectly classified pairs
correct = []
incorrect = []

pairs = [(i, i+1) for i in range(0, len(predictions), 2)]
for i1, i2 in pairs:
query = inputs[i1].split("@")[1].strip()
related = targets[i1].split("@")[1].strip()
random = targets[i2].split("@")[1].strip()
if (related in movie_ids["popularity"] and random in movie_ids["popularity"]
and query in movie_ids["popularity"]):
if float(predictions[i1]) >= float(predictions[i2]):
correct.append((query, related, random))
else:
incorrect.append((query, related, random))

correct_popularities = [movie_ids["popularity"][x[0]] for x in correct]
incorrect_popularities = [movie_ids["popularity"][x[0]] for x in incorrect]

# plot the correctly and incorrectly classified pairs on a histogram
fig = go.Figure()
fig.add_trace(go.Histogram(x=correct_popularities, name="correct"))
fig.add_trace(go.Histogram(x=incorrect_popularities, name="incorrect"))

fig.update_layout(barmode="overlay")
fig.update_traces(opacity=0.5)
fig.update_layout(
title="Correct vs Incorrect Popularity Distributions",
xaxis_title="Popularity",
yaxis_title="Frequency"
)
fig.show()

# log mean/median differences
logging.info("Correct ----------")
logging.info("mean: %d median %d", np.mean(correct_popularities),
np.median(correct_popularities))
logging.info("Incorrect ----------")
logging.info("mean: %d median %d", np.mean(incorrect_popularities),
np.median(incorrect_popularities))

if __name__ == "__main__":
app.run(main)
75 changes: 49 additions & 26 deletions test/test_build_movielens.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,33 +12,56 @@
# See the License for the specific language governing permissions and
# limitations under the License.

"""Unit Tests for E2E Convrec modules"""
from data import build_movielens
"""Unit Tests for E2E Convrec modules."""
import unittest

from data import build_movielens


class TestBuildMovielens(unittest.TestCase):

def test_flip_titles(self):
test_inputs = [
"Green Mile, The (1999) @ Good, the Bad and the Ugly, The (Buono, il brutto, il cattivo, Il) (1966) @ Devil's Advocate, The (1997) ",
"King's Speech, The (2010) @ Social Network, The (2010) @ Catch Me If You Can (2002)",
"Brady Bunch Movie, The (1995) @ Shining, The (1980) @ Cool Hand Luke (1967)",
"House Bunny, The (2008)",
"Ten Commandments, The (1956)",
"Fake Movie, the (subtitle) weirdness, (0000)"
]

test_outputs = [
"The Green Mile (1999) @ The Good, the Bad and the Ugly (Buono, il brutto, il cattivo, Il) (1966) @ The Devil's Advocate (1997)",
"The King's Speech (2010) @ The Social Network (2010) @ Catch Me If You Can (2002)",
"The Brady Bunch Movie (1995) @ The Shining (1980) @ Cool Hand Luke (1967)",
"The House Bunny (2008)",
"The Ten Commandments (1956)",
"the Fake Movie (subtitle) weirdness, (0000)"
]
for test_input, test_output in zip(test_inputs, test_outputs):
print(build_movielens.flip_titles(test_input), test_output)
self.assertEqual(build_movielens.flip_titles(test_input), test_output, "should put title in order")

if __name__ == '__main__':
unittest.main()
def test_flip_titles(self):
test_inputs = [
"Green Mile, The (1999) @ Good, the Bad and the Ugly, The (Buono, il brut"
+ "to, il cattivo, Il) (1966) @ Devil's Advocate, The (1997) ",
"King's Speech, The (2010) @ Social Network, The (2010) @ Catch Me If You"
+ " Can (2002)",
"Brady Bunch Movie, The (1995) @ Shining, The (1980) @ Cool Hand Luke (19"
+ "67)", "House Bunny, The (2008)", "Ten Commandments, The (1956)",
"Fake Movie, the (subtitle) weirdness, (0000)"
]

test_outputs = [
"The Green Mile (1999) @ The Good, the Bad and the Ugly (Buono, il brutto"
+ ", il cattivo, Il) (1966) @ The Devil's Advocate (1997)",
"The King's Speech (2010) @ The Social Network (2010) @ Catch Me If You "
+ "Can (2002)",
"The Brady Bunch Movie (1995) @ The Shining (1980) @ Cool Hand Luke "
+ "(1967)",
"The House Bunny (2008)",
"The Ten Commandments (1956)",
"the Fake Movie (subtitle) weirdness, (0000)"
]
for test_input, test_output in zip(test_inputs, test_outputs):
self.assertEqual(build_movielens.flip_titles(test_input), test_output,
"should put title in order")

def test_parse_sequence(self):
test_sequences = [
"(1, [1, 2])",
"(2, [3, 4]",
"(3, []",
"(4, [1, 1])"
]
expected_parsed = [
[1, 2],
[3, 4],
[],
[1, 1]
]
for test_seq, ex_parsed in zip(test_sequences, expected_parsed):
self.assertEqual(build_movielens.parse_user_seq(test_seq),
ex_parsed, "incorrect string -> list parsing")

if __name__ == "__main__":
unittest.main()
26 changes: 26 additions & 0 deletions test/test_build_probes.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,5 +95,31 @@ def calc_pmi2(co_ab, pop_a, pop_b, num_seq):
self.assertTrue(np.allclose(np.exp(pmi2), np.exp(expected_pmi2)),
"pmi2 calculation incorrect")

def test_get_related_movies(self):
sequences = [
["a", "b", "c"],
["b", "c", "d"],
["c"]
]
movie_ids = build_probe_1_data.create_movie_ids(sequences)
co = build_probe_1_data.create_cooccurrence(sequences, movie_ids)
pmi2 = build_probe_1_data.create_pmi(co, movie_ids)
all_movies = movie_ids["all_movies"]
movie_ids["id_to_movie"] = dict(zip([str(x) for x in
range(len(all_movies))], all_movies))
filtered_set = set(movie_ids["all_movies"])

expected_related = {
"a": ["b", "c", "d"],
"b": ["c", "d", "a"],
"c": ["b", "d", "a"],
"d": ["b", "c", "a"]
}

for movie, ex_related in expected_related.items():
related = build_probe_1_data.get_related_movies(movie, movie_ids, pmi2,
filtered_set, k=3)
self.assertEqual(", ".join(ex_related), ", ".join(related))

if __name__ == "__main__":
unittest.main()
2 changes: 1 addition & 1 deletion trainer/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
PROBE_1_TSV_PATH = {
"validation": os.path.join(PROBE_DIR, "probe_1.tsv")
}
PROBE_1_SEQ_PATH = {
PROBE_1_SEQ_TSV_PATH = {
"validation": os.path.join(PROBE_DIR, "probe_1_sequences.tsv")
}
PROBE_2_TSV_PATH = {
Expand Down
23 changes: 6 additions & 17 deletions trainer/finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
"a step number or -1 for latest"))
flags.DEFINE_enum("tags_version", "normal", ["normal", "reversed", "masked"],
"version of the tags dataset: normal, reversed, or masked")
flags.DEFINE_integer("eval_start", 999900, "step at which to start eval")
flags.DEFINE_integer("beam_size", 1, "beam size for saved model")
flags.DEFINE_float("temperature", 1.0, "temperature for saved model")
flags.DEFINE_float("learning_rate", .003, "learning rate for finetuning")
Expand Down Expand Up @@ -107,8 +108,6 @@ def main(_):
# Supply a function which preprocesses text from the tf.data.Dataset.
text_preprocessor=[
preprocessing.preprocessor_wrapper("rd_recommendations")],
# Use the same vocabulary that we used for pre-training.
# sentencepiece_model_path=t5.data.DEFAULT_SPM_PATH,
# Lowercase targets before computing metrics.
postprocess_fn=t5.data.postprocessors.lower_text,
# We'll use bleu, bleu no titles, and recall as our evaluation metrics.
Expand All @@ -124,11 +123,9 @@ def main(_):
splits=["train", "validation"],
# Supply a function which preprocesses text from the tf.data.Dataset.
text_preprocessor=[preprocessing.preprocessor_wrapper("ml_sequences")],
# Use the same vocabulary that we used for pre-training.
# sentencepiece_model_path=t5.data.DEFAULT_SPM_PATH,
# Lowercase targets before computing metrics.
postprocess_fn=t5.data.postprocessors.lower_text,
# We'll use accuracy/recall as our evaluation metric.
# We'll use accuracy as our evaluation metric.
metric_fns=[t5.evaluation.metrics.accuracy])

# set up the ml-tags task (training on movielens tags and genres)
Expand All @@ -141,11 +138,9 @@ def main(_):
splits=["train", "validation"],
# Supply a function which preprocesses text from the tf.data.Dataset.
text_preprocessor=[preprocessing.preprocessor_wrapper("ml_tags")],
# Use the same vocabulary that we used for pre-training.
# sentencepiece_model_path=t5.data.DEFAULT_SPM_PATH,
# Lowercase targets before computing metrics.
postprocess_fn=t5.data.postprocessors.lower_text,
# We'll use accuracy/recall and bleu as our evaluation metrics.
# We'll use accuracy as our evaluation metric.
metric_fns=[t5.evaluation.metrics.accuracy])

# set up the ml-reviews task (training on movielens movies with imdb reviews)
Expand All @@ -157,31 +152,25 @@ def main(_):
splits=["train", "validation"],
# Supply a function which preprocesses text from the tf.data.Dataset.
text_preprocessor=[preprocessing.preprocessor_wrapper("ml_reviews")],
# Use the same vocabulary that we used for pre-training.
# sentencepiece_model_path=t5.data.DEFAULT_SPM_PATH,
# Lowercase targets before computing metrics.
postprocess_fn=t5.data.postprocessors.lower_text,
# We'll use accuracy/recall and bleu as our evaluation metrics.
# We'll use bleu as our evaluation metric.
metric_fns=[metrics.t2t_bleu])

if "probe" in FLAGS.mode:
if "sequences" in FLAGS.mode:
t5.data.TaskRegistry.add(
FLAGS.mode,
# Supply a function which returns a tf.data.Dataset.
dataset_fn=preprocessing.dataset_fn_wrapper(FLAGS.mode),
splits=["validation"],
# Supply a function which preprocesses text from the tf.data.Dataset.
text_preprocessor=[
preprocessing.preprocessor_wrapper("ml_sequences")],
metric_fns=[metrics.probe_pair_accuracy])
else:
t5.data.TaskRegistry.add(
FLAGS.mode,
# Supply a function which returns a tf.data.Dataset.
dataset_fn=preprocessing.dataset_fn_wrapper(FLAGS.mode),
splits=["validation"],
# Supply a function which preprocesses text from the tf.data.Dataset.
text_preprocessor=[
preprocessing.preprocessor_wrapper("rd_recommendations")],
metric_fns=[metrics.probe_pair_accuracy])
Expand Down Expand Up @@ -239,14 +228,14 @@ def main(_):
model.batch_size = train_batch_size * 8
model.eval(
mixture_or_task_name=FLAGS.task,
checkpoint_steps=list(range(999900, 999901+FLAGS.steps, 2000)),
checkpoint_steps=list(range(FLAGS.eval_start, 999901 + FLAGS.steps, 2000)),
compute_sequence_length=False
)

if "probe" in FLAGS.mode:
model.batch_size = train_batch_size * 8

for steps in range(999900, 999901+FLAGS.steps, 2000):
for steps in range(FLAGS.eval_start, 999901 + FLAGS.steps, 2000):
model.eval(
mixture_or_task_name=FLAGS.mode,
checkpoint_steps=steps,
Expand Down
Loading

0 comments on commit 346f3cc

Please sign in to comment.