Skip to content

Commit

Permalink
Make confusion_matrix and roc generic
Browse files Browse the repository at this point in the history
  • Loading branch information
Jeffwan committed May 5, 2019
1 parent bcca4ed commit b5d9b76
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 7 deletions.
9 changes: 6 additions & 3 deletions components/local/confusion_matrix/src/confusion_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import argparse
import json
import os
import urlparse
import pandas as pd
from sklearn.metrics import confusion_matrix, accuracy_score
from tensorflow.python.lib.io import file_io
Expand All @@ -40,7 +41,9 @@ def main(argv=None):
'If not set, the input must include a "target" column.')
args = parser.parse_args()

on_cloud = args.output.startswith('gs://')
storage_service_dict = {'minio': 'minio', 'gs': 'gcs', 's3': 's3', '': 'gcs'}
storage_service_scheme = urlparse.urlparse(args.output).scheme
on_cloud = True if storage_service_scheme else False
if not on_cloud and not os.path.exists(args.output):
os.makedirs(args.output)

Expand All @@ -52,7 +55,7 @@ def main(argv=None):
for file in files:
with file_io.FileIO(file, 'r') as f:
dfs.append(pd.read_csv(f, names=names))

df = pd.concat(dfs)
if args.target_lambda:
df['target'] = df.apply(eval(args.target_lambda), axis=1)
Expand All @@ -72,7 +75,7 @@ def main(argv=None):
metadata = {
'outputs' : [{
'type': 'confusion_matrix',
'storage': 'gcs',
'storage': storage_service_dict[storage_service_scheme],
'format': 'csv',
'schema': [
{'name': 'target', 'type': 'CATEGORY'},
Expand Down
11 changes: 7 additions & 4 deletions components/local/roc/src/roc.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import argparse
import json
import os
import urlparse
import pandas as pd
from sklearn.metrics import roc_curve, roc_auc_score
from tensorflow.python.lib.io import file_io
Expand All @@ -45,7 +46,9 @@ def main(argv=None):
parser.add_argument('--output', type=str, help='GCS path of the output directory.')
args = parser.parse_args()

on_cloud = args.output.startswith('gs://')
storage_service_dict = {'minio': 'minio', 'gs': 'gcs', 's3': 's3', '': 'gcs'}
storage_service_scheme = urlparse.urlparse(args.output).scheme
on_cloud = True if storage_service_scheme else False
if not on_cloud and not os.path.exists(args.output):
os.makedirs(args.output)

Expand All @@ -64,7 +67,7 @@ def main(argv=None):
for file in files:
with file_io.FileIO(file, 'r') as f:
dfs.append(pd.read_csv(f, names=names))

df = pd.concat(dfs)
if args.target_lambda:
df['target'] = df.apply(eval(args.target_lambda), axis=1)
Expand All @@ -76,11 +79,11 @@ def main(argv=None):
roc_file = os.path.join(args.output, 'roc.csv')
with file_io.FileIO(roc_file, 'w') as f:
df_roc.to_csv(f, columns=['fpr', 'tpr', 'thresholds'], header=False, index=False)

metadata = {
'outputs': [{
'type': 'roc',
'storage': 'gcs',
'storage': storage_service_dict[storage_service_scheme],
'format': 'csv',
'schema': [
{'name': 'fpr', 'type': 'NUMBER'},
Expand Down

0 comments on commit b5d9b76

Please sign in to comment.