From 7bcab561937022c3d3e22d462887161d917e452d Mon Sep 17 00:00:00 2001 From: darrylong Date: Fri, 23 Feb 2024 17:18:40 +0800 Subject: [PATCH 01/12] Include metric_user_results in evaluation response, added eval json endpoint --- cornac/serving/app.py | 114 +++++++++++++++++++++++++++++++++--------- 1 file changed, 89 insertions(+), 25 deletions(-) diff --git a/cornac/serving/app.py b/cornac/serving/app.py index 5e6d0484..35e0548d 100644 --- a/cornac/serving/app.py +++ b/cornac/serving/app.py @@ -26,7 +26,7 @@ from cornac.metrics import * try: - from flask import Flask, jsonify, request + from flask import Flask, jsonify, request, abort except ImportError: exit("Flask is required in order to serve models.\n" + "Run: pip3 install Flask") @@ -197,34 +197,11 @@ def evaluate(): return "Unable to evaluate. 'train_set' is not provided", 400 query = request.json + validate_query(query) - query_metrics = query.get("metrics") - rating_threshold = query.get("rating_threshold", 1.0) exclude_unknowns = ( query.get("exclude_unknowns", "true").lower() == "true" ) # exclude unknown users/items by default, otherwise specified - user_based = ( - query.get("user_based", "true").lower() == "true" - ) # user_based evaluation by default, otherwise specified - - if query_metrics is None: - return "metrics is required", 400 - elif not isinstance(query_metrics, list): - return "metrics must be an array of metrics", 400 - - # organize metrics - metrics = [] - for metric in query_metrics: - try: - metrics.append(_safe_eval(metric)) - except: - return ( - f"Invalid metric initiation: {metric}.\n" - + "Please input correct metrics (e.g., 'RMSE()', 'Recall(k=10)')", - 400, - ) - - rating_metrics, ranking_metrics = BaseMethod.organize_metrics(metrics) # read data data = [] @@ -244,6 +221,85 @@ def evaluate(): exclude_unknowns=exclude_unknowns, ) + return process_evaluation(test_set, query, exclude_unknowns) + + +def validate_query(query): + query_metrics = query.get("metrics") + + if query_metrics is None: + abort(400, "metrics is required") + elif not isinstance(query_metrics, list): + abort(400, "metrics must be an array of metrics") + + +@app.route("/evaluate-json", methods=["POST"]) +def evaluate_json(): + global model, train_set, metric_classnames + + # Input validation + if model is None: + abort(400, "Model is not yet loaded. Please try again later.") + + if train_set is None: + abort(400, "Unable to evaluate. 'train_set' is not provided") + + query = request.get_json() + + validate_query(query) + + if "data" not in query: + abort(400, "Evaluation data is not provided. 'data' is required in the form of a list of tuples (uid, iid, rating).") + + exclude_unknowns = ( + query.get("exclude_unknowns", "true").lower() == "true" + ) # exclude unknown users/items by default, otherwise specified + + # read data + data = query.get("data") + + if not len(data): + raise ValueError("No data available to evaluate the model.") + + # convert rows of data to tuples + for i, row in enumerate(data): + data[i] = tuple(row) + + test_set = Dataset.build( + data, + fmt="UIR", + global_uid_map=train_set.uid_map, + global_iid_map=train_set.iid_map, + exclude_unknowns=exclude_unknowns, + ) + + return process_evaluation(test_set, query, exclude_unknowns) + + +def process_evaluation(test_set, query, exclude_unknowns): + global model, train_set + + rating_threshold = query.get("rating_threshold", 1.0) + user_based = ( + query.get("user_based", "true").lower() == "true" + ) # user_based evaluation by default, otherwise specified + + query_metrics = query.get("metrics") + + # organize metrics + metrics = [] + for metric in query_metrics: + try: + metrics.append(_safe_eval(metric)) + except: + return ( + f"Invalid metric initiation: {metric}.\n" + + "Please input correct metrics (e.g., 'RMSE()', 'Recall(k=10)')", + 400, + ) + + rating_metrics, ranking_metrics = BaseMethod.organize_metrics(metrics) + # evaluation result = BaseMethod.eval( model=model, @@ -257,10 +313,18 @@ def evaluate(): user_based=user_based, verbose=False, ) + + # change metric_user_results inner keys to string + metric_user_results = {} + for metric, user_results in result.metric_user_results.items(): + metric_user_results[metric] = { + str(k): v for k, v in user_results.items() + } # response response = { "result": result.metric_avg_results, + "user_result": metric_user_results, "query": query, } From 12aa123eb0e69520c6c4716cf9e5d399c669abf8 Mon Sep 17 00:00:00 2001 From: darrylong Date: Mon, 26 Feb 2024 14:56:45 +0800 Subject: [PATCH 02/12] Remove query from response --- cornac/serving/app.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cornac/serving/app.py b/cornac/serving/app.py index 35e0548d..c28f1160 100644 --- a/cornac/serving/app.py +++ b/cornac/serving/app.py @@ -325,7 +325,6 @@ def process_evaluation(test_set, query, exclude_unknowns): response = { "result": result.metric_avg_results, "user_result": metric_user_results, - "query": query, } return jsonify(response), 200 From 1422161d5f051014e2cc3374c712094acca3e49d Mon Sep 17 00:00:00 2001 From: darrylong Date: Tue, 27 Feb 2024 17:13:06 +0800 Subject: [PATCH 03/12] Utilize mapped inversed user id map to get original id in response --- cornac/serving/app.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/cornac/serving/app.py b/cornac/serving/app.py index c28f1160..b7e97367 100644 --- a/cornac/serving/app.py +++ b/cornac/serving/app.py @@ -313,12 +313,15 @@ def process_evaluation(test_set, query, exclude_unknowns): user_based=user_based, verbose=False, ) - + + uid_map = train_set.uid_map + inversed_uid_map = {str(v): k for k, v in uid_map.items()} + # change metric_user_results inner keys to string metric_user_results = {} for metric, user_results in result.metric_user_results.items(): metric_user_results[metric] = { - str(k): v for k, v in user_results.items() + inversed_uid_map[str(k)]: v for k, v in user_results.items() } # response From 1f5d0ee7466f1e67364aee335d1c083c4f47046f Mon Sep 17 00:00:00 2001 From: darrylong Date: Tue, 27 Feb 2024 17:24:46 +0800 Subject: [PATCH 04/12] Update serving test case to remove 'query' and add 'user_result' in response --- tests/cornac/serving/test_app.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/cornac/serving/test_app.py b/tests/cornac/serving/test_app.py index b33d6e63..ccb9f3d6 100644 --- a/tests/cornac/serving/test_app.py +++ b/tests/cornac/serving/test_app.py @@ -96,9 +96,10 @@ def test_evaluate_json(client): response = client.post('/evaluate', json=json_data) # assert response.content_type == 'application/json' assert response.status_code == 200 - assert len(response.json['query']['metrics']) == 2 assert 'RMSE' in response.json['result'] assert 'Recall@5' in response.json['result'] + assert 'RMSE' in response.json['user_result'] + assert 'Recall@5' in response.json['user_result'] def test_evalulate_incorrect_get(client): From 6b86d62f48392bb0f39a6c729ccc4e463333bedc Mon Sep 17 00:00:00 2001 From: Quoc-Tuan Truong Date: Tue, 27 Feb 2024 10:39:27 -0800 Subject: [PATCH 05/12] simplify user ID mapping --- cornac/serving/app.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/cornac/serving/app.py b/cornac/serving/app.py index b7e97367..febc1cc8 100644 --- a/cornac/serving/app.py +++ b/cornac/serving/app.py @@ -314,14 +314,11 @@ def process_evaluation(test_set, query, exclude_unknowns): verbose=False, ) - uid_map = train_set.uid_map - inversed_uid_map = {str(v): k for k, v in uid_map.items()} - - # change metric_user_results inner keys to string + # map user index back into the original user ID metric_user_results = {} for metric, user_results in result.metric_user_results.items(): metric_user_results[metric] = { - inversed_uid_map[str(k)]: v for k, v in user_results.items() + train_set.user_ids[int(k)]: v for k, v in user_results.items() } # response From 6267719c3e0d301dc9b7a0d50563a30de9defb81 Mon Sep 17 00:00:00 2001 From: darrylong Date: Thu, 29 Feb 2024 17:33:09 +0800 Subject: [PATCH 06/12] Combined evaluation and evaluation_json endpoints --- cornac/serving/app.py | 35 ++++++++--------------------------- 1 file changed, 8 insertions(+), 27 deletions(-) diff --git a/cornac/serving/app.py b/cornac/serving/app.py index febc1cc8..79507fc1 100644 --- a/cornac/serving/app.py +++ b/cornac/serving/app.py @@ -203,6 +203,10 @@ def evaluate(): query.get("exclude_unknowns", "true").lower() == "true" ) # exclude unknown users/items by default, otherwise specified + if "use_data" in query: + data = query.get("use_data") + return evaluate_json(exclude_unknowns, query, data) + # read data data = [] data_fpath = "data/feedback.csv" @@ -210,7 +214,7 @@ def evaluate(): reader = Reader() data = reader.read(data_fpath, fmt="UIR", sep=",") - if not len(data): + if not data: raise ValueError("No data available to evaluate the model.") test_set = Dataset.build( @@ -233,32 +237,9 @@ def validate_query(query): abort(400, "metrics must be an array of metrics") -@app.route("/evaluate-json", methods=["POST"]) -def evaluate_json(): - global model, train_set, metric_classnames - - # Input validation - if model is None: - abort(400, "Model is not yet loaded. Please try again later.") - - if train_set is None: - abort(400, "Unable to evaluate. 'train_set' is not provided") - - query = request.get_json() - - validate_query(query) - - if "data" not in query: - abort(400, "Evaluation data is not provided. 'data' is required in the form of a list of tuples (uid, iid, rating).") - - exclude_unknowns = ( - query.get("exclude_unknowns", "true").lower() == "true" - ) # exclude unknown users/items by default, otherwise specified - +def evaluate_json(exclude_unknowns, query, data): # read data - data = query.get("data") - - if not len(data): + if not data: raise ValueError("No data available to evaluate the model.") # convert rows of data to tuples @@ -285,7 +266,7 @@ def process_evaluation(test_set, query, exclude_unknowns): ) # user_based evaluation by default, otherwise specified query_metrics = query.get("metrics") - + # organize metrics metrics = [] for metric in query_metrics: From 2d299ef236587b7c795eebced0a88c50ced33045 Mon Sep 17 00:00:00 2001 From: darrylong Date: Thu, 29 Feb 2024 18:43:49 +0800 Subject: [PATCH 07/12] Updated abort responses to show plaintext instead of html --- cornac/serving/app.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/cornac/serving/app.py b/cornac/serving/app.py index 79507fc1..67d3d664 100644 --- a/cornac/serving/app.py +++ b/cornac/serving/app.py @@ -26,7 +26,7 @@ from cornac.metrics import * try: - from flask import Flask, jsonify, request, abort + from flask import Flask, jsonify, request, abort, make_response except ImportError: exit("Flask is required in order to serve models.\n" + "Run: pip3 install Flask") @@ -231,10 +231,14 @@ def evaluate(): def validate_query(query): query_metrics = query.get("metrics") - if query_metrics is None: - abort(400, "metrics is required") + if not query_metrics: + response = make_response("metrics is required") + response.status_code = 400 + abort(response) elif not isinstance(query_metrics, list): - abort(400, "metrics must be an array of metrics") + response = make_response("metrics must be an array of metrics") + response.status_code = 400 + abort(response) def evaluate_json(exclude_unknowns, query, data): From 43257dd2c7929a92446cfe7b0ff7d4cab31ab707 Mon Sep 17 00:00:00 2001 From: darrylong Date: Thu, 29 Feb 2024 18:45:33 +0800 Subject: [PATCH 08/12] Added unit test cases --- tests/cornac/serving/test_app.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/cornac/serving/test_app.py b/tests/cornac/serving/test_app.py index ccb9f3d6..f9fde529 100644 --- a/tests/cornac/serving/test_app.py +++ b/tests/cornac/serving/test_app.py @@ -111,3 +111,26 @@ def test_evalulate_incorrect_post(client): response = client.post('/evaluate') assert response.status_code == 415 # bad request, expect json + +def test_evaluate_missing_metrics(client): + json_data = { + 'metrics': [] + } + response = client.post('/evaluate', json=json_data) + assert response.status_code == 400 + assert response.data == b'metrics is required' + + +def test_evaluate_not_list_metrics(client): + json_data = { + 'metrics': 'RMSE()' + } + response = client.post('/evaluate', json=json_data) + assert response.status_code == 400 + assert response.data == b'metrics must be an array of metrics' + + +def test_recommend_missing_uid(client): + response = client.get('/recommend?k=5') + assert response.status_code == 400 + assert response.data == b'uid is required' From f56314a88756a2e3f2d2a77c3422b855f29f628b Mon Sep 17 00:00:00 2001 From: darrylong Date: Thu, 29 Feb 2024 23:52:05 +0800 Subject: [PATCH 09/12] Updated error response for empty data --- cornac/serving/app.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/cornac/serving/app.py b/cornac/serving/app.py index 67d3d664..cefa0802 100644 --- a/cornac/serving/app.py +++ b/cornac/serving/app.py @@ -215,7 +215,9 @@ def evaluate(): data = reader.read(data_fpath, fmt="UIR", sep=",") if not data: - raise ValueError("No data available to evaluate the model.") + response = make_response("No feedback has been provided so far. No data available to evaluate the model.") + response.status_code = 400 + abort(response) test_set = Dataset.build( data, @@ -244,7 +246,9 @@ def validate_query(query): def evaluate_json(exclude_unknowns, query, data): # read data if not data: - raise ValueError("No data available to evaluate the model.") + response = make_response("'use_data' is empty. No data available to evaluate the model.") + response.status_code = 400 + abort(response) # convert rows of data to tuples for i, row in enumerate(data): From bf7b7666d9316e30f20684777838b39984ecbd4e Mon Sep 17 00:00:00 2001 From: darrylong Date: Thu, 29 Feb 2024 23:52:34 +0800 Subject: [PATCH 10/12] Added unit tests for provided data evaluation --- tests/cornac/serving/test_app.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tests/cornac/serving/test_app.py b/tests/cornac/serving/test_app.py index f9fde529..7db13d20 100644 --- a/tests/cornac/serving/test_app.py +++ b/tests/cornac/serving/test_app.py @@ -134,3 +134,29 @@ def test_recommend_missing_uid(client): response = client.get('/recommend?k=5') assert response.status_code == 400 assert response.data == b'uid is required' + + +def test_evaluate_use_data(client): + json_data = { + 'metrics': ['RMSE()', 'Recall(k=5)'], + 'use_data': [['930', '795', 5], ['195', '795', 3]] + } + response = client.post('/evaluate', json=json_data) + # assert response.content_type == 'application/json' + assert response.status_code == 200 + assert 'RMSE' in response.json['result'] + assert 'Recall@5' in response.json['result'] + assert 'RMSE' in response.json['user_result'] + assert 'Recall@5' in response.json['user_result'] + + +def test_evaluate_use_data_empty(client): + json_data = { + 'metrics': ['RMSE()', 'Recall(k=5)'], + 'use_data': [] + } + response = client.post('/evaluate', json=json_data) + assert response.status_code == 400 + assert response.data == b"'use_data' is empty. No data available to evaluate the model." + + From b61adc0dc9b4c9d72aadd186513cd8f8b9d9a643 Mon Sep 17 00:00:00 2001 From: Quoc-Tuan Truong Date: Thu, 29 Feb 2024 13:56:08 -0800 Subject: [PATCH 11/12] Update app.py --- cornac/serving/app.py | 41 ++++++++--------------------------------- 1 file changed, 8 insertions(+), 33 deletions(-) diff --git a/cornac/serving/app.py b/cornac/serving/app.py index cefa0802..41e9225b 100644 --- a/cornac/serving/app.py +++ b/cornac/serving/app.py @@ -185,7 +185,6 @@ def add_feedback(): return jsonify(data), 200 -# curl -X POST -H "Content-Type: application/json" -d '{"metrics": ["RMSE()", "NDCG(k=10)"]}' "http://localhost:8080/evaluate" @app.route("/evaluate", methods=["POST"]) def evaluate(): global model, train_set, metric_classnames @@ -203,16 +202,14 @@ def evaluate(): query.get("exclude_unknowns", "true").lower() == "true" ) # exclude unknown users/items by default, otherwise specified - if "use_data" in query: - data = query.get("use_data") - return evaluate_json(exclude_unknowns, query, data) - - # read data - data = [] - data_fpath = "data/feedback.csv" - if os.path.exists(data_fpath): - reader = Reader() - data = reader.read(data_fpath, fmt="UIR", sep=",") + if "data" in query: + data = query.get("data") + else: + data = [] + data_fpath = "data/feedback.csv" + if os.path.exists(data_fpath): + reader = Reader() + data = reader.read(data_fpath, fmt="UIR", sep=",") if not data: response = make_response("No feedback has been provided so far. No data available to evaluate the model.") @@ -243,28 +240,6 @@ def validate_query(query): abort(response) -def evaluate_json(exclude_unknowns, query, data): - # read data - if not data: - response = make_response("'use_data' is empty. No data available to evaluate the model.") - response.status_code = 400 - abort(response) - - # convert rows of data to tuples - for i, row in enumerate(data): - data[i] = tuple(row) - - test_set = Dataset.build( - data, - fmt="UIR", - global_uid_map=train_set.uid_map, - global_iid_map=train_set.iid_map, - exclude_unknowns=exclude_unknowns, - ) - - return process_evaluation(test_set, query, exclude_unknowns) - - def process_evaluation(test_set, query, exclude_unknowns): global model, train_set From bab99d6b04c8eb0057537f089fde322070b725a6 Mon Sep 17 00:00:00 2001 From: Quoc-Tuan Truong Date: Thu, 29 Feb 2024 14:05:27 -0800 Subject: [PATCH 12/12] Update test_app.py --- tests/cornac/serving/test_app.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/cornac/serving/test_app.py b/tests/cornac/serving/test_app.py index 7db13d20..6bfebe79 100644 --- a/tests/cornac/serving/test_app.py +++ b/tests/cornac/serving/test_app.py @@ -139,7 +139,7 @@ def test_recommend_missing_uid(client): def test_evaluate_use_data(client): json_data = { 'metrics': ['RMSE()', 'Recall(k=5)'], - 'use_data': [['930', '795', 5], ['195', '795', 3]] + 'data': [['930', '795', 5], ['195', '795', 3]] } response = client.post('/evaluate', json=json_data) # assert response.content_type == 'application/json' @@ -153,10 +153,10 @@ def test_evaluate_use_data(client): def test_evaluate_use_data_empty(client): json_data = { 'metrics': ['RMSE()', 'Recall(k=5)'], - 'use_data': [] + 'data': [] } response = client.post('/evaluate', json=json_data) assert response.status_code == 400 - assert response.data == b"'use_data' is empty. No data available to evaluate the model." + assert response.data == b"No feedback has been provided so far. No data available to evaluate the model."