From ff2a0010806ecadc1aaab0d4367871b54ad02c7d Mon Sep 17 00:00:00 2001 From: Manav Shah Date: Tue, 29 Oct 2024 02:38:02 -0400 Subject: [PATCH 1/5] Adding Streaming platforms for suggested movies. --- requirements.txt | 58 ++++++++++++++++++++++++----- src/recommenderapp/app.py | 12 +++++- src/recommenderapp/static/script.js | 7 ++++ src/recommenderapp/utils.py | 29 +++++++++++++++ 4 files changed, 95 insertions(+), 11 deletions(-) diff --git a/requirements.txt b/requirements.txt index 174a692f5..be22a765c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,14 +1,54 @@ -numpy==1.25.2 -pandas==2.1.0 +astroid==3.0.3 +autopep8==2.0.4 +bcrypt==4.0.1 +black==23.7.0 +blinker==1.8.2 +click==8.1.7 +coverage==7.6.4 +dill==0.3.9 +distlib==0.3.9 +dnspython==2.7.0 +exceptiongroup==1.2.2 +filelock==3.16.1 Flask==2.3.3 Flask-Cors==4.0.0 -autopep8==2.0.4 +Flask-PyMongo==2.3.0 +importlib_metadata==8.5.0 +iniconfig==2.0.0 +isort==5.13.2 +itsdangerous==2.2.0 +Jinja2==3.1.4 +joblib==1.4.2 +MarkupSafe==3.0.2 +mccabe==0.7.0 +mypy-extensions==1.0.0 +numpy==1.25.2 +packaging==24.1 +pandas==2.1.0 +pathspec==0.12.1 +pbr==6.1.0 +platformdirs==4.3.6 +pluggy==1.5.0 +protobuf==4.21.12 +pycodestyle==2.12.1 +pylint==3.0.1 +pymongo==4.10.1 pytest==7.4.2 pytest-cov==4.1.0 -pylint==3.0.1 +python-dateutil==2.9.0.post0 python-dotenv==1.0.0 -mysql==0.0.3 -mysqlclient==2.2.0 -mysql-connector-python==8.2.0 -black===23.7.0 -bcrypt===4.0.1 +pytz==2024.2 +scikit-surprise==1.1.4 +scipy==1.13.1 +six==1.16.0 +stevedore==5.3.0 +surprise==0.1 +tomli==2.0.2 +tomlkit==0.13.2 +typing_extensions==4.12.2 +tzdata==2024.2 +virtualenv==20.27.0 +virtualenv-clone==0.5.7 +virtualenvwrapper==6.1.1 +Werkzeug==3.0.4 +zipp==3.20.2 diff --git a/src/recommenderapp/app.py b/src/recommenderapp/app.py index f8de60b27..c108682e5 100644 --- a/src/recommenderapp/app.py +++ b/src/recommenderapp/app.py @@ -12,7 +12,6 @@ import os from flask import Flask, jsonify, render_template, request, g, redirect, url_for from flask_cors import CORS -import mysql.connector from pymongo import MongoClient from bson.objectid import ObjectId from dotenv import load_dotenv @@ -29,8 +28,11 @@ get_friends, get_recent_friend_movies, get_user_history, + fetch_streaming_link ) from search import Search +from flask import Flask, request, jsonify +import requests sys.path.append("../../") from src.prediction_scripts.item_based import ( @@ -132,7 +134,13 @@ def predict(): user_rating, user[1], client ) - resp = {"recommendations": recommendations, "genres": genres, "imdb_id": imdb_id} + web_url = [] + for element in imdb_id: + web_url.append(fetch_streaming_link(element)) + + resp = {"recommendations": recommendations, "genres": genres, "imdb_id": imdb_id, "web_url": new_list} + + print(resp, end="\n") return resp diff --git a/src/recommenderapp/static/script.js b/src/recommenderapp/static/script.js index b7fed335a..bf72ed109 100644 --- a/src/recommenderapp/static/script.js +++ b/src/recommenderapp/static/script.js @@ -78,15 +78,21 @@ $(document).ready(function () { var i = 0 var recommendations = response["recommendations"] var imdbIds = response["imdb_id"] + var webUrls = response["web_url"] for (var i = 0; i < recommendations.length; i++) { var element = recommendations[i] var imdbID = imdbIds[i] + var web_url = webUrls[i] var diventry = $("
") var fieldset = $("
", { id: i }).css("border", "0") var link = $("") .text("IMDb🔗") .css({ "text-decoration": "none" }) .attr("href", "https://www.imdb.com/title/" + imdbID) + var streaming_link = $("") + .text(" Stream Here!🍿") + .css({ "text-decoration": "none" }) + .attr("href", web_url) var li = $("
  • ").text(element) var radios = $(` @@ -112,6 +118,7 @@ $(document).ready(function () { diventry.append(li) diventry.append(link) + diventry.append(streaming_link) diventry.append(radios) fieldset.append(diventry) ulList.append(fieldset) diff --git a/src/recommenderapp/utils.py b/src/recommenderapp/utils.py index 048503993..1efe176f7 100644 --- a/src/recommenderapp/utils.py +++ b/src/recommenderapp/utils.py @@ -20,6 +20,8 @@ import json import pandas as pd import os +from flask import Flask, request, jsonify +import requests def create_colored_tags(genres): @@ -464,3 +466,30 @@ def get_user_history(client, user_id): except Exception as e: print(f"Error retrieving user history: {str(e)}") raise + +def fetch_streaming_link(imdb_id): + + if not imdb_id: + return jsonify({'error': 'Please provide imdb_id'}), 400 + + url = f'https://api.watchmode.com/v1/title/{imdb_id}/sources/' + api_key = 'fh04Ehayqo4Rdn7RJ0vaGttCD8QYbmWRgZsB4DYy' + + headers = { + 'Authorization': f'Bearer {api_key}' + } + + params = { + 'apiKey': api_key, + 'regions': 'US' + } + + response = requests.get(url, headers=headers, params=params) + + sources = {item["name"]: {"platform": item["name"], "url": item["web_url"]} for item in response.json()} + res = sorted(sources.values(), key=lambda x: x["platform"]) + + if res: # Check if res is not empty + return res[0]["url"] # Returns the first URL + else: + return None \ No newline at end of file From c54487328ec2246053b20e0844aa51488eac0279 Mon Sep 17 00:00:00 2001 From: Manav Shah Date: Tue, 29 Oct 2024 02:41:56 -0400 Subject: [PATCH 2/5] formatted code. --- src/recommenderapp/app.py | 13 +++++++++---- src/recommenderapp/utils.py | 28 +++++++++++++--------------- test/test_predict.py | 10 +++++++--- 3 files changed, 29 insertions(+), 22 deletions(-) diff --git a/src/recommenderapp/app.py b/src/recommenderapp/app.py index c108682e5..0eddc81b3 100644 --- a/src/recommenderapp/app.py +++ b/src/recommenderapp/app.py @@ -28,7 +28,7 @@ get_friends, get_recent_friend_movies, get_user_history, - fetch_streaming_link + fetch_streaming_link, ) from search import Search from flask import Flask, request, jsonify @@ -137,10 +137,15 @@ def predict(): web_url = [] for element in imdb_id: web_url.append(fetch_streaming_link(element)) - - resp = {"recommendations": recommendations, "genres": genres, "imdb_id": imdb_id, "web_url": new_list} - print(resp, end="\n") + resp = { + "recommendations": recommendations, + "genres": genres, + "imdb_id": imdb_id, + "web_url": new_list, + } + + print(resp, end="\n") return resp diff --git a/src/recommenderapp/utils.py b/src/recommenderapp/utils.py index 1efe176f7..5610d3fb2 100644 --- a/src/recommenderapp/utils.py +++ b/src/recommenderapp/utils.py @@ -467,29 +467,27 @@ def get_user_history(client, user_id): print(f"Error retrieving user history: {str(e)}") raise + def fetch_streaming_link(imdb_id): - if not imdb_id: - return jsonify({'error': 'Please provide imdb_id'}), 400 + return jsonify({"error": "Please provide imdb_id"}), 400 - url = f'https://api.watchmode.com/v1/title/{imdb_id}/sources/' - api_key = 'fh04Ehayqo4Rdn7RJ0vaGttCD8QYbmWRgZsB4DYy' - - headers = { - 'Authorization': f'Bearer {api_key}' - } - - params = { - 'apiKey': api_key, - 'regions': 'US' - } + url = f"https://api.watchmode.com/v1/title/{imdb_id}/sources/" + api_key = "fh04Ehayqo4Rdn7RJ0vaGttCD8QYbmWRgZsB4DYy" + + headers = {"Authorization": f"Bearer {api_key}"} + + params = {"apiKey": api_key, "regions": "US"} response = requests.get(url, headers=headers, params=params) - sources = {item["name"]: {"platform": item["name"], "url": item["web_url"]} for item in response.json()} + sources = { + item["name"]: {"platform": item["name"], "url": item["web_url"]} + for item in response.json() + } res = sorted(sources.values(), key=lambda x: x["platform"]) if res: # Check if res is not empty return res[0]["url"] # Returns the first URL else: - return None \ No newline at end of file + return None diff --git a/test/test_predict.py b/test/test_predict.py index 5b3c722d3..a2739ab79 100644 --- a/test/test_predict.py +++ b/test/test_predict.py @@ -71,15 +71,19 @@ def test_large_history_input(self): ts = [{"title": f"Movie {i}", "rating": 10.0} for i in range(500)] recommendations, _, _ = recommend_for_new_user(ts, user_id, client) self.assertTrue(len(recommendations) <= 10) - + def test_genre_diversity_in_recommendations(self): ts = [ {"title": "Mortal Kombat (1995)", "rating": 8.0}, {"title": "Les Miserables (1995)", "rating": 7.0}, - {"title": "Jurassic Park (1993)", "rating": 9.0} + {"title": "Jurassic Park (1993)", "rating": 9.0}, ] _, genres, _ = recommend_for_new_user(ts, user_id, client) unique_genres = set(g for genre in genres for g in genre.split("|")) - self.assertTrue({"Action", "History", "Science Fiction"}.issubset(unique_genres)) + self.assertTrue( + {"Action", "History", "Science Fiction"}.issubset(unique_genres) + ) + + if __name__ == "__main__": unittest.main() From 6404bf231083664f10a1a09fdb6c441bcfe9f860 Mon Sep 17 00:00:00 2001 From: Manav Shah Date: Wed, 30 Oct 2024 23:41:44 -0400 Subject: [PATCH 3/5] Fixing pylint issues. --- src/recommenderapp/app.py | 15 ++++----------- src/recommenderapp/item_based.py | 2 +- src/recommenderapp/utils.py | 8 +++++--- 3 files changed, 10 insertions(+), 15 deletions(-) diff --git a/src/recommenderapp/app.py b/src/recommenderapp/app.py index 40bc14442..88f34af0d 100644 --- a/src/recommenderapp/app.py +++ b/src/recommenderapp/app.py @@ -11,14 +11,14 @@ import json from flask import Flask, jsonify, render_template, request from flask_cors import CORS -from pymongo import MongoClient from pymongo.errors import ( OperationFailure, DuplicateKeyError, ) +from search import Search from bson.objectid import ObjectId -from src.recommenderapp.client import client -from src.recommenderapp.utils import ( +from client import client +from utils import ( beautify_feedback_data, send_email_to_user, create_account, @@ -29,16 +29,10 @@ add_friend, get_friends, get_recent_friend_movies, - get_user_history, fetch_streaming_link, ) -from search import Search -from flask import Flask, request, jsonify -import requests -) -from src.recommenderapp.search import Search -from src.recommenderapp.item_based import ( +from item_based import ( recommend_for_new_user, ) @@ -130,7 +124,6 @@ def predict(): "recommendations": recommendations, "genres": genres, "imdb_id": imdb_id, - "web_url": new_list, } print(resp, end="\n") diff --git a/src/recommenderapp/item_based.py b/src/recommenderapp/item_based.py index 29bd39a3c..eb188d50f 100644 --- a/src/recommenderapp/item_based.py +++ b/src/recommenderapp/item_based.py @@ -8,7 +8,7 @@ import os import pandas as pd from surprise import Dataset, Reader, SVD -from src.recommenderapp.utils import get_user_ratings +from utils import get_user_ratings app_dir = os.path.dirname(os.path.abspath(__file__)) code_dir = os.path.dirname(app_dir) diff --git a/src/recommenderapp/utils.py b/src/recommenderapp/utils.py index 3a274ba2f..645a77f09 100644 --- a/src/recommenderapp/utils.py +++ b/src/recommenderapp/utils.py @@ -22,7 +22,6 @@ import json import pandas as pd import os -from flask import Flask, request, jsonify import requests @@ -475,6 +474,9 @@ def get_user_history(client, user_id): def fetch_streaming_link(imdb_id): + """ + Fetches the streaming links of movies. + """ if not imdb_id: return jsonify({"error": "Please provide imdb_id"}), 400 @@ -495,5 +497,5 @@ def fetch_streaming_link(imdb_id): if res: # Check if res is not empty return res[0]["url"] # Returns the first URL - else: - return None + + return None From 02a57f94bf52067144f27b7723537a73bb6a638b Mon Sep 17 00:00:00 2001 From: Manav Shah Date: Wed, 30 Oct 2024 23:55:38 -0400 Subject: [PATCH 4/5] Pylint Fix. --- src/prediction_scripts/item_based.py | 119 +++++++++++++++++++++++++++ src/recommenderapp/app.py | 8 +- src/recommenderapp/item_based.py | 2 +- 3 files changed, 124 insertions(+), 5 deletions(-) create mode 100644 src/prediction_scripts/item_based.py diff --git a/src/prediction_scripts/item_based.py b/src/prediction_scripts/item_based.py new file mode 100644 index 000000000..88ebd1f53 --- /dev/null +++ b/src/prediction_scripts/item_based.py @@ -0,0 +1,119 @@ +""" +Copyright (c) 2024 Srimadh Vasuki Rao, Manav Shah, Akul Devali +This code is licensed under MIT license (see LICENSE for details) + +@author: PopcornPicks +""" + +import os +import pandas as pd +from surprise import Dataset, Reader, SVD +import sys + +sys.path.append("../") +from src.recommenderapp.utils import get_user_ratings + +sys.path.remove("../") +import json + +app_dir = os.path.dirname(os.path.abspath(__file__)) +code_dir = os.path.dirname(app_dir) +project_dir = os.path.dirname(code_dir) + + +from surprise import Dataset, Reader, SVD +import pandas as pd +import numpy as np +import os + + +def recommend_for_new_user(user_rating, user_id, client): + """ + Generates a list of recommended movie titles for a new user using a hybrid approach: + collaborative filtering based on user history combined with metadata matching with current selection. + """ + if not user_rating: + return [], None, None + movies = pd.read_csv(os.path.join(project_dir, "data", "movies.csv")) + + all_ratings = get_user_ratings(client) + + ratings = pd.DataFrame(all_ratings) + ratings["user_id"] = ratings["user_id"].astype(str) + ratings["movie_id"] = ratings["movie_id"].astype(str) + surprise_df = ratings[["user_id", "movie_id", "score"]].copy() + surprise_df.columns = ["user", "item", "rating"] + + surprise_df["user"] = surprise_df["user"].apply( + lambda x: int(x, 16) if pd.notnull(x) else None + ) + surprise_df["item"] = surprise_df["item"].astype(str).astype(int) + + reader = Reader(rating_scale=(0, 10)) + data = Dataset.load_from_df(surprise_df, reader) + trainset = data.build_full_trainset() + svd_model = SVD() + svd_model.fit(trainset) + + user_rated_movies = ratings[ratings["user_id"] == user_id]["movie_id"].tolist() + all_movie_ids = movies["movieId"].unique() + + recommendations = [] + for movie_id in all_movie_ids: + if movie_id not in user_rated_movies: + pred = svd_model.predict(int(user_id, 16), movie_id) + recommendations.append((movie_id, pred.est)) + + recommendations_df = pd.DataFrame( + recommendations, columns=["movieId", "predicted_rating"] + ) + enriched_movies = pd.merge(recommendations_df, movies, on="movieId") + + selected_movies = movies[ + movies["title"].isin([movie["title"] for movie in user_rating]) + ] + + if selected_movies.empty: + return [], None, None + + avg_genre_vector = selected_movies["genres"].str.get_dummies(sep="|").mean() + + enriched_movies_genres = enriched_movies["genres"].str.get_dummies(sep="|") + enriched_movies_genres = enriched_movies_genres.reindex( + columns=avg_genre_vector.index, fill_value=0 + ) + + enriched_movies["genre_similarity"] = enriched_movies_genres.dot(avg_genre_vector) + + avg_runtime = selected_movies["runtime"].mean() + + enriched_movies["runtime_similarity"] = ( + 1 + - abs(enriched_movies["runtime"] - avg_runtime) + / enriched_movies["runtime"].max() + ) + + if len(ratings[ratings["user_id"] == user_id]) < 10: + user_rating_weight = 1 + collaborative_weight = 0 + genre_weight = 0 + else: + user_rating_weight = 0.5 + collaborative_weight = 0.4 + genre_weight = 0.1 + + enriched_movies["hybrid_score"] = ( + user_rating_weight * enriched_movies["genre_similarity"] + + collaborative_weight * enriched_movies["predicted_rating"] + + genre_weight * enriched_movies["runtime_similarity"] + ) + + enriched_movies.sort_values(by="hybrid_score", ascending=False, inplace=True) + + top_movies = enriched_movies.head(10) + + return ( + list(top_movies["title"]), + list(top_movies["genres"]), + list(top_movies["imdb_id"]), + ) diff --git a/src/recommenderapp/app.py b/src/recommenderapp/app.py index 88f34af0d..95a580538 100644 --- a/src/recommenderapp/app.py +++ b/src/recommenderapp/app.py @@ -15,10 +15,10 @@ OperationFailure, DuplicateKeyError, ) -from search import Search +from src.recommenderapp.search import Search from bson.objectid import ObjectId -from client import client -from utils import ( +from src.recommenderapp.client import client +from src.recommenderapp.utils import ( beautify_feedback_data, send_email_to_user, create_account, @@ -32,7 +32,7 @@ fetch_streaming_link, ) -from item_based import ( +from src.recommenderapp.item_based import ( recommend_for_new_user, ) diff --git a/src/recommenderapp/item_based.py b/src/recommenderapp/item_based.py index eb188d50f..29bd39a3c 100644 --- a/src/recommenderapp/item_based.py +++ b/src/recommenderapp/item_based.py @@ -8,7 +8,7 @@ import os import pandas as pd from surprise import Dataset, Reader, SVD -from utils import get_user_ratings +from src.recommenderapp.utils import get_user_ratings app_dir = os.path.dirname(os.path.abspath(__file__)) code_dir = os.path.dirname(app_dir) From cb31fa418d046c3d1fee5ae4ba3ab874240bac37 Mon Sep 17 00:00:00 2001 From: Manav Shah Date: Thu, 31 Oct 2024 01:36:27 -0400 Subject: [PATCH 5/5] Updating it to make it compliant with main branch. --- src/prediction_scripts/item_based.py | 119 --------------------------- 1 file changed, 119 deletions(-) delete mode 100644 src/prediction_scripts/item_based.py diff --git a/src/prediction_scripts/item_based.py b/src/prediction_scripts/item_based.py deleted file mode 100644 index 88ebd1f53..000000000 --- a/src/prediction_scripts/item_based.py +++ /dev/null @@ -1,119 +0,0 @@ -""" -Copyright (c) 2024 Srimadh Vasuki Rao, Manav Shah, Akul Devali -This code is licensed under MIT license (see LICENSE for details) - -@author: PopcornPicks -""" - -import os -import pandas as pd -from surprise import Dataset, Reader, SVD -import sys - -sys.path.append("../") -from src.recommenderapp.utils import get_user_ratings - -sys.path.remove("../") -import json - -app_dir = os.path.dirname(os.path.abspath(__file__)) -code_dir = os.path.dirname(app_dir) -project_dir = os.path.dirname(code_dir) - - -from surprise import Dataset, Reader, SVD -import pandas as pd -import numpy as np -import os - - -def recommend_for_new_user(user_rating, user_id, client): - """ - Generates a list of recommended movie titles for a new user using a hybrid approach: - collaborative filtering based on user history combined with metadata matching with current selection. - """ - if not user_rating: - return [], None, None - movies = pd.read_csv(os.path.join(project_dir, "data", "movies.csv")) - - all_ratings = get_user_ratings(client) - - ratings = pd.DataFrame(all_ratings) - ratings["user_id"] = ratings["user_id"].astype(str) - ratings["movie_id"] = ratings["movie_id"].astype(str) - surprise_df = ratings[["user_id", "movie_id", "score"]].copy() - surprise_df.columns = ["user", "item", "rating"] - - surprise_df["user"] = surprise_df["user"].apply( - lambda x: int(x, 16) if pd.notnull(x) else None - ) - surprise_df["item"] = surprise_df["item"].astype(str).astype(int) - - reader = Reader(rating_scale=(0, 10)) - data = Dataset.load_from_df(surprise_df, reader) - trainset = data.build_full_trainset() - svd_model = SVD() - svd_model.fit(trainset) - - user_rated_movies = ratings[ratings["user_id"] == user_id]["movie_id"].tolist() - all_movie_ids = movies["movieId"].unique() - - recommendations = [] - for movie_id in all_movie_ids: - if movie_id not in user_rated_movies: - pred = svd_model.predict(int(user_id, 16), movie_id) - recommendations.append((movie_id, pred.est)) - - recommendations_df = pd.DataFrame( - recommendations, columns=["movieId", "predicted_rating"] - ) - enriched_movies = pd.merge(recommendations_df, movies, on="movieId") - - selected_movies = movies[ - movies["title"].isin([movie["title"] for movie in user_rating]) - ] - - if selected_movies.empty: - return [], None, None - - avg_genre_vector = selected_movies["genres"].str.get_dummies(sep="|").mean() - - enriched_movies_genres = enriched_movies["genres"].str.get_dummies(sep="|") - enriched_movies_genres = enriched_movies_genres.reindex( - columns=avg_genre_vector.index, fill_value=0 - ) - - enriched_movies["genre_similarity"] = enriched_movies_genres.dot(avg_genre_vector) - - avg_runtime = selected_movies["runtime"].mean() - - enriched_movies["runtime_similarity"] = ( - 1 - - abs(enriched_movies["runtime"] - avg_runtime) - / enriched_movies["runtime"].max() - ) - - if len(ratings[ratings["user_id"] == user_id]) < 10: - user_rating_weight = 1 - collaborative_weight = 0 - genre_weight = 0 - else: - user_rating_weight = 0.5 - collaborative_weight = 0.4 - genre_weight = 0.1 - - enriched_movies["hybrid_score"] = ( - user_rating_weight * enriched_movies["genre_similarity"] - + collaborative_weight * enriched_movies["predicted_rating"] - + genre_weight * enriched_movies["runtime_similarity"] - ) - - enriched_movies.sort_values(by="hybrid_score", ascending=False, inplace=True) - - top_movies = enriched_movies.head(10) - - return ( - list(top_movies["title"]), - list(top_movies["genres"]), - list(top_movies["imdb_id"]), - )