From 2dce84fafd8e0b81be00cd097abb07b0088bb74f Mon Sep 17 00:00:00 2001 From: Stefan Suwelack Date: Mon, 18 Sep 2023 17:36:33 +0200 Subject: [PATCH 1/7] pre-defined layouts for model debug and comparison --- renumics/spotlight/layouts/__init__.py | 2 + renumics/spotlight/layouts/model_compare.py | 69 +++++++++++++++++++ renumics/spotlight/layouts/model_debug.py | 74 +++++++++++++++++++++ 3 files changed, 145 insertions(+) create mode 100644 renumics/spotlight/layouts/__init__.py create mode 100644 renumics/spotlight/layouts/model_compare.py create mode 100644 renumics/spotlight/layouts/model_debug.py diff --git a/renumics/spotlight/layouts/__init__.py b/renumics/spotlight/layouts/__init__.py new file mode 100644 index 00000000..9ea84248 --- /dev/null +++ b/renumics/spotlight/layouts/__init__.py @@ -0,0 +1,2 @@ +from .model_debug import model_debug_classification +from .model_compare import model_compare_classification \ No newline at end of file diff --git a/renumics/spotlight/layouts/model_compare.py b/renumics/spotlight/layouts/model_compare.py new file mode 100644 index 00000000..e7fb00e6 --- /dev/null +++ b/renumics/spotlight/layouts/model_compare.py @@ -0,0 +1,69 @@ +from renumics.spotlight import layout +from renumics.spotlight.layout import lenses, table, similaritymap, inspector, split, tab, metric, issues, confusion_matrix, histogram +from typing import Optional, List +from renumics.spotlight.analysis.typing import DataIssue +from renumics.spotlight import Audio, Image + + +def model_compare_classification(label: str='label', model1_prediction: str='m1_prediction', model1_embedding: str=None, model1_correct:str=None, model2_prediction: str='m2_prediction', model2_embedding: str=None, model2_correct:str=None , inspect: Optional[dict]=None, features: Optional[list]=None): + + + # first column: table + issues + metrics = split([tab(metric(name="Accuracy model 1", metric='accuracy', columns=[label, model1_prediction])), tab(metric(name="Accuracy model 2", metric='accuracy', columns=[label, model2_prediction]))], orientation="vertical", weight=15) + column1 = split([ metrics, tab(table(), weight=65)], weight=80, orientation="horizontal") + column1 = split([ column1, tab(issues(), weight=40)], weight=80, orientation="horizontal") + + + column2 = tab(confusion_matrix(name='Model 1 confusion matrix',x_column=label, y_column=model1_prediction), confusion_matrix(name='Model 2 confusion matrix',x_column=label, y_column=model2_prediction), weight=40) + + # third column: similarity maps + if model1_correct is not None: + if model2_correct is not None: + row2 = tab(confusion_matrix(name='Model1 vs. Model2 - binned scatterplot',x_column=model1_correct, y_column=model2_correct), weight=40) + column2 = split([column2, row2], weight=80, orientation='horizontal') + + + + + if model1_embedding is not None: + if model2_embedding is not None: + row3 = tab(similaritymap(name='Model 1 embedding', columns=[model1_embedding], color_by_column=label), similaritymap(name='Model 2 embedding', columns=[model2_embedding], color_by_column=label), weight=40) + column2 = split([column2, row3], orientation="horizontal") + + + + # fourth column: inspector + inspector_fields = [] + if inspect: + for item, _type in inspect.items(): + if _type == Audio: + inspector_fields.append(lenses.audio(item)) + elif _type == Image: + inspector_fields.append(lenses.image(item)) + else: + print('Type {} not supported by this layout.'.format(_type)) + + inspector_fields.append(lenses.scalar(label)) + inspector_fields.append(lenses.scalar(model1_prediction)) + inspector_fields.append(lenses.scalar(model2_prediction)) + + inspector_view=inspector("Inspector", lenses=inspector_fields, num_columns=4) + + else: + inspector_view = inspector("Inspector", num_columns=4) + + + #build everything together + column2.weight=40 + half1 = split([column1, column2], weight=80, orientation="vertical") + half2 = tab(inspector_view, weight=40) + + + nodes = [ + half1, half2 + ] + + the_layout = layout.layout(nodes) + + + return the_layout \ No newline at end of file diff --git a/renumics/spotlight/layouts/model_debug.py b/renumics/spotlight/layouts/model_debug.py new file mode 100644 index 00000000..435eb200 --- /dev/null +++ b/renumics/spotlight/layouts/model_debug.py @@ -0,0 +1,74 @@ +from renumics.spotlight import layout +from renumics.spotlight.layout import lenses, table, similaritymap, inspector, split, tab, metric, issues, confusion_matrix, histogram +from typing import Optional, List +from renumics.spotlight.analysis.typing import DataIssue +from renumics.spotlight import Audio, Image + + +def model_debug_classification(label: str='label', prediction: str='prediction', embedding: str=None, inspect: Optional[dict]=None, features: Optional[list]=None): + + + # first column: table + issues + metrics = tab(metric(name="Accuracy", metric='accuracy', columns=[label, prediction]), weight=15) + column1 = split([ metrics, tab(table(), weight=65)], weight=80, orientation="horizontal") + column1 = split([ column1, tab(issues(), weight=40)], weight=80, orientation="horizontal") + + + column2 = tab(confusion_matrix(name='Confusion matrix',x_column=label, y_column=prediction), weight=40) + + # third column: confusion matric, feature histograms (optional), embedding (optional) + if features is not None: + histogram_list = [] + for idx, feature in enumerate(features): + if idx >2: + break + h = histogram(name="Histogram {}".format(feature), column=feature, stack_by_column=label) + histogram_list.append(h) + + row2 = tab(*histogram_list, weight=40) + column2 = split([column2, row2], weight=80, orientation='horizontal') + + + if embedding is not None: + + row3 = tab(similaritymap(name='Embedding', columns=[embedding], color_by_column=label), weight=40) + + column2 = split([column2, row3], orientation="horizontal") + + + + # fourth column: inspector + inspector_fields = [] + if inspect: + for item, _type in inspect.items(): + if _type == Audio: + inspector_fields.append(lenses.audio(item)) + elif _type == Image: + inspector_fields.append(lenses.image(item)) + else: + print('Type {} not supported by this layout.'.format(_type)) + + inspector_fields.append(lenses.scalar(label)) + inspector_fields.append(lenses.scalar(prediction)) + + inspector_view=inspector("Inspector", lenses=inspector_fields, num_columns=4) + + else: + inspector_view = inspector("Inspector", num_columns=4) + + + #build everything together + column2.weight=40 + half1 = split([column1, column2], weight=80, orientation="vertical") + half2 = tab(inspector_view, weight=40) + + + nodes = [ + half1, half2 + ] + + the_layout = layout.layout(nodes) + + + return the_layout + From 38913183acd04dce40600ee8088b24897adb32e6 Mon Sep 17 00:00:00 2001 From: Stefan Suwelack Date: Tue, 19 Sep 2023 09:27:20 +0200 Subject: [PATCH 2/7] code linting --- renumics/spotlight/layouts/model_compare.py | 132 ++++++++++++++------ renumics/spotlight/layouts/model_debug.py | 93 ++++++++------ 2 files changed, 151 insertions(+), 74 deletions(-) diff --git a/renumics/spotlight/layouts/model_compare.py b/renumics/spotlight/layouts/model_compare.py index e7fb00e6..6cef6aaa 100644 --- a/renumics/spotlight/layouts/model_compare.py +++ b/renumics/spotlight/layouts/model_compare.py @@ -1,38 +1,99 @@ from renumics.spotlight import layout -from renumics.spotlight.layout import lenses, table, similaritymap, inspector, split, tab, metric, issues, confusion_matrix, histogram -from typing import Optional, List -from renumics.spotlight.analysis.typing import DataIssue +from renumics.spotlight.layout import ( + Layout, + lenses, + table, + similaritymap, + inspector, + split, + tab, + metric, + issues, + confusion_matrix, +) +from typing import Optional from renumics.spotlight import Audio, Image -def model_compare_classification(label: str='label', model1_prediction: str='m1_prediction', model1_embedding: str=None, model1_correct:str=None, model2_prediction: str='m2_prediction', model2_embedding: str=None, model2_correct:str=None , inspect: Optional[dict]=None, features: Optional[list]=None): - +def model_compare_classification( + label: str = "label", + model1_prediction: str = "m1_prediction", + model1_embedding: str = "", + model1_correct: str = "", + model2_prediction: str = "m2_prediction", + model2_embedding: str = "", + model2_correct: str = "", + inspect: Optional[dict] = None +) -> Layout: + # first column: table + issues + metrics = split( + [ + tab( + metric( + name="Accuracy model 1", + metric="accuracy", + columns=[label, model1_prediction], + ) + ), + tab( + metric( + name="Accuracy model 2", + metric="accuracy", + columns=[label, model2_prediction], + ) + ), + ], + orientation="vertical", + weight=15, + ) + column1 = split( + [metrics, tab(table(), weight=65)], weight=80, orientation="horizontal" + ) + column1 = split( + [column1, tab(issues(), weight=40)], weight=80, orientation="horizontal" + ) + + column2 = tab( + confusion_matrix( + name="Model 1 confusion matrix", x_column=label, y_column=model1_prediction + ), + confusion_matrix( + name="Model 2 confusion matrix", x_column=label, y_column=model2_prediction + ), + weight=40, + ) - # first column: table + issues - metrics = split([tab(metric(name="Accuracy model 1", metric='accuracy', columns=[label, model1_prediction])), tab(metric(name="Accuracy model 2", metric='accuracy', columns=[label, model2_prediction]))], orientation="vertical", weight=15) - column1 = split([ metrics, tab(table(), weight=65)], weight=80, orientation="horizontal") - column1 = split([ column1, tab(issues(), weight=40)], weight=80, orientation="horizontal") - - - column2 = tab(confusion_matrix(name='Model 1 confusion matrix',x_column=label, y_column=model1_prediction), confusion_matrix(name='Model 2 confusion matrix',x_column=label, y_column=model2_prediction), weight=40) - # third column: similarity maps - if model1_correct is not None: - if model2_correct is not None: - row2 = tab(confusion_matrix(name='Model1 vs. Model2 - binned scatterplot',x_column=model1_correct, y_column=model2_correct), weight=40) - column2 = split([column2, row2], weight=80, orientation='horizontal') - - - - - if model1_embedding is not None: - if model2_embedding is not None: - row3 = tab(similaritymap(name='Model 1 embedding', columns=[model1_embedding], color_by_column=label), similaritymap(name='Model 2 embedding', columns=[model2_embedding], color_by_column=label), weight=40) + if model1_correct != "": + if model2_correct != "": + row2 = tab( + confusion_matrix( + name="Model1 vs. Model2 - binned scatterplot", + x_column=model1_correct, + y_column=model2_correct, + ), + weight=40, + ) + column2 = split([column2, row2], weight=80, orientation="horizontal") + + if model1_embedding != "": + if model2_embedding != "": + row3 = tab( + similaritymap( + name="Model 1 embedding", + columns=[model1_embedding], + color_by_column=label, + ), + similaritymap( + name="Model 2 embedding", + columns=[model2_embedding], + color_by_column=label, + ), + weight=40, + ) column2 = split([column2, row3], orientation="horizontal") - - - # fourth column: inspector + # fourth column: inspector inspector_fields = [] if inspect: for item, _type in inspect.items(): @@ -41,29 +102,24 @@ def model_compare_classification(label: str='label', model1_prediction: str='m1_ elif _type == Image: inspector_fields.append(lenses.image(item)) else: - print('Type {} not supported by this layout.'.format(_type)) + print("Type {} not supported by this layout.".format(_type)) inspector_fields.append(lenses.scalar(label)) inspector_fields.append(lenses.scalar(model1_prediction)) inspector_fields.append(lenses.scalar(model2_prediction)) - inspector_view=inspector("Inspector", lenses=inspector_fields, num_columns=4) + inspector_view = inspector("Inspector", lenses=inspector_fields, num_columns=4) else: inspector_view = inspector("Inspector", num_columns=4) - - #build everything together - column2.weight=40 + # build everything together + column2.weight = 40 half1 = split([column1, column2], weight=80, orientation="vertical") half2 = tab(inspector_view, weight=40) - - nodes = [ - half1, half2 - ] + nodes = [half1, half2] the_layout = layout.layout(nodes) - - return the_layout \ No newline at end of file + return the_layout diff --git a/renumics/spotlight/layouts/model_debug.py b/renumics/spotlight/layouts/model_debug.py index 435eb200..fe8910ff 100644 --- a/renumics/spotlight/layouts/model_debug.py +++ b/renumics/spotlight/layouts/model_debug.py @@ -1,43 +1,70 @@ from renumics.spotlight import layout -from renumics.spotlight.layout import lenses, table, similaritymap, inspector, split, tab, metric, issues, confusion_matrix, histogram -from typing import Optional, List -from renumics.spotlight.analysis.typing import DataIssue +from renumics.spotlight.layout import ( + Layout, + lenses, + table, + similaritymap, + inspector, + split, + tab, + metric, + issues, + confusion_matrix, + histogram, +) +from typing import Optional from renumics.spotlight import Audio, Image -def model_debug_classification(label: str='label', prediction: str='prediction', embedding: str=None, inspect: Optional[dict]=None, features: Optional[list]=None): - +def model_debug_classification( + label: str = "label", + prediction: str = "prediction", + embedding: str = "", + inspect: Optional[dict] = None, + features: Optional[list] = None, +) -> Layout: + # first column: table + issues + metrics = tab( + metric(name="Accuracy", metric="accuracy", columns=[label, prediction]), + weight=15, + ) + column1 = split( + [metrics, tab(table(), weight=65)], weight=80, orientation="horizontal" + ) + column1 = split( + [column1, tab(issues(), weight=40)], weight=80, orientation="horizontal" + ) + + column2 = tab( + confusion_matrix(name="Confusion matrix", x_column=label, y_column=prediction), + weight=40, + ) - # first column: table + issues - metrics = tab(metric(name="Accuracy", metric='accuracy', columns=[label, prediction]), weight=15) - column1 = split([ metrics, tab(table(), weight=65)], weight=80, orientation="horizontal") - column1 = split([ column1, tab(issues(), weight=40)], weight=80, orientation="horizontal") - - - column2 = tab(confusion_matrix(name='Confusion matrix',x_column=label, y_column=prediction), weight=40) - # third column: confusion matric, feature histograms (optional), embedding (optional) - if features is not None: + if features is not None: histogram_list = [] for idx, feature in enumerate(features): - if idx >2: + if idx > 2: break - h = histogram(name="Histogram {}".format(feature), column=feature, stack_by_column=label) + h = histogram( + name="Histogram {}".format(feature), + column=feature, + stack_by_column=label, + ) histogram_list.append(h) - row2 = tab(*histogram_list, weight=40) - column2 = split([column2, row2], weight=80, orientation='horizontal') - - - if embedding is not None: + row2 = tab(*histogram_list, weight=40) + column2 = split([column2, row2], weight=80, orientation="horizontal") - row3 = tab(similaritymap(name='Embedding', columns=[embedding], color_by_column=label), weight=40) + if embedding != "": + row3 = tab( + similaritymap(name="Embedding", columns=[embedding], color_by_column=label), + weight=40, + ) column2 = split([column2, row3], orientation="horizontal") - - - # fourth column: inspector + # fourth column: inspector inspector_fields = [] if inspect: for item, _type in inspect.items(): @@ -46,29 +73,23 @@ def model_debug_classification(label: str='label', prediction: str='prediction', elif _type == Image: inspector_fields.append(lenses.image(item)) else: - print('Type {} not supported by this layout.'.format(_type)) + print("Type {} not supported by this layout.".format(_type)) inspector_fields.append(lenses.scalar(label)) inspector_fields.append(lenses.scalar(prediction)) - inspector_view=inspector("Inspector", lenses=inspector_fields, num_columns=4) + inspector_view = inspector("Inspector", lenses=inspector_fields, num_columns=4) else: inspector_view = inspector("Inspector", num_columns=4) - - #build everything together - column2.weight=40 + # build everything together + column2.weight = 40 half1 = split([column1, column2], weight=80, orientation="vertical") half2 = tab(inspector_view, weight=40) - - nodes = [ - half1, half2 - ] + nodes = [half1, half2] the_layout = layout.layout(nodes) - return the_layout - From 2cb1440fdf77a2405252586252c24c2af63274b1 Mon Sep 17 00:00:00 2001 From: Stefan Suwelack Date: Tue, 19 Sep 2023 10:03:28 +0200 Subject: [PATCH 3/7] code linting --- renumics/spotlight/layouts/__init__.py | 2 +- renumics/spotlight/layouts/model_compare.py | 39 +++++++++++++++------ renumics/spotlight/layouts/model_debug.py | 24 ++++++++++--- 3 files changed, 48 insertions(+), 17 deletions(-) diff --git a/renumics/spotlight/layouts/__init__.py b/renumics/spotlight/layouts/__init__.py index 9ea84248..c73f5fab 100644 --- a/renumics/spotlight/layouts/__init__.py +++ b/renumics/spotlight/layouts/__init__.py @@ -1,2 +1,2 @@ from .model_debug import model_debug_classification -from .model_compare import model_compare_classification \ No newline at end of file +from .model_compare import model_compare_classification diff --git a/renumics/spotlight/layouts/model_compare.py b/renumics/spotlight/layouts/model_compare.py index 6cef6aaa..29394942 100644 --- a/renumics/spotlight/layouts/model_compare.py +++ b/renumics/spotlight/layouts/model_compare.py @@ -23,7 +23,7 @@ def model_compare_classification( model2_prediction: str = "m2_prediction", model2_embedding: str = "", model2_correct: str = "", - inspect: Optional[dict] = None + inspect: Optional[dict] = None, ) -> Layout: # first column: table + issues metrics = split( @@ -53,14 +53,21 @@ def model_compare_classification( [column1, tab(issues(), weight=40)], weight=80, orientation="horizontal" ) - column2 = tab( - confusion_matrix( - name="Model 1 confusion matrix", x_column=label, y_column=model1_prediction - ), - confusion_matrix( - name="Model 2 confusion matrix", x_column=label, y_column=model2_prediction - ), - weight=40, + column2_list = [] + column2_list.append( + tab( + confusion_matrix( + name="Model 1 confusion matrix", + x_column=label, + y_column=model1_prediction, + ), + confusion_matrix( + name="Model 2 confusion matrix", + x_column=label, + y_column=model2_prediction, + ), + weight=40, + ) ) # third column: similarity maps @@ -74,7 +81,7 @@ def model_compare_classification( ), weight=40, ) - column2 = split([column2, row2], weight=80, orientation="horizontal") + column2_list.append(row2) if model1_embedding != "": if model2_embedding != "": @@ -91,7 +98,17 @@ def model_compare_classification( ), weight=40, ) - column2 = split([column2, row3], orientation="horizontal") + column2_list.append(row3) + + if len(column2_list) == 1: + column2 = column2_list[0] + elif len(column2_list) == 2: + column2 = split(column2_list, orientation="horizontal") + else: + column2 = split( + [column2_list[0], column2_list[1]], weight=80, orientation="horizontal" + ) + column2 = split([column2, column2_list[2]], orientation="horizontal") # fourth column: inspector inspector_fields = [] diff --git a/renumics/spotlight/layouts/model_debug.py b/renumics/spotlight/layouts/model_debug.py index fe8910ff..576b02a1 100644 --- a/renumics/spotlight/layouts/model_debug.py +++ b/renumics/spotlight/layouts/model_debug.py @@ -35,9 +35,14 @@ def model_debug_classification( [column1, tab(issues(), weight=40)], weight=80, orientation="horizontal" ) - column2 = tab( - confusion_matrix(name="Confusion matrix", x_column=label, y_column=prediction), - weight=40, + column2_list = [] + column2_list.append( + tab( + confusion_matrix( + name="Confusion matrix", x_column=label, y_column=prediction + ), + weight=40, + ) ) # third column: confusion matric, feature histograms (optional), embedding (optional) @@ -54,15 +59,24 @@ def model_debug_classification( histogram_list.append(h) row2 = tab(*histogram_list, weight=40) - column2 = split([column2, row2], weight=80, orientation="horizontal") + column2_list.append(row2) if embedding != "": row3 = tab( similaritymap(name="Embedding", columns=[embedding], color_by_column=label), weight=40, ) + column2_list.append(row3) - column2 = split([column2, row3], orientation="horizontal") + if len(column2_list) == 1: + column2 = column2_list[0] + elif len(column2_list) == 2: + column2 = split(column2_list, orientation="horizontal") + else: + column2 = split( + [column2_list[0], column2_list[1]], weight=80, orientation="horizontal" + ) + column2 = split([column2, column2_list[2]], orientation="horizontal") # fourth column: inspector inspector_fields = [] From a52219ae7403f6dad968f57930a1cb125c58e53c Mon Sep 17 00:00:00 2001 From: Stefan Suwelack Date: Tue, 19 Sep 2023 13:27:01 +0200 Subject: [PATCH 4/7] docstring added --- renumics/spotlight/layouts/__init__.py | 6 ++++-- renumics/spotlight/layouts/model_compare.py | 24 +++++++++++++++++++-- renumics/spotlight/layouts/model_debug.py | 18 ++++++++++++++-- 3 files changed, 42 insertions(+), 6 deletions(-) diff --git a/renumics/spotlight/layouts/__init__.py b/renumics/spotlight/layouts/__init__.py index c73f5fab..740fa53c 100644 --- a/renumics/spotlight/layouts/__init__.py +++ b/renumics/spotlight/layouts/__init__.py @@ -1,2 +1,4 @@ -from .model_debug import model_debug_classification -from .model_compare import model_compare_classification +from .model_debug import debug_classification +from .model_compare import compare_classification + +__all__ = ["debug_classification", "compare_classification"] diff --git a/renumics/spotlight/layouts/model_compare.py b/renumics/spotlight/layouts/model_compare.py index 29394942..75fb5ad1 100644 --- a/renumics/spotlight/layouts/model_compare.py +++ b/renumics/spotlight/layouts/model_compare.py @@ -1,6 +1,8 @@ from renumics.spotlight import layout from renumics.spotlight.layout import ( Layout, + Tab, + Split, lenses, table, similaritymap, @@ -11,11 +13,11 @@ issues, confusion_matrix, ) -from typing import Optional +from typing import Optional, Union from renumics.spotlight import Audio, Image -def model_compare_classification( +def compare_classification( label: str = "label", model1_prediction: str = "m1_prediction", model1_embedding: str = "", @@ -25,6 +27,22 @@ def model_compare_classification( model2_correct: str = "", inspect: Optional[dict] = None, ) -> Layout: + """This function generates a Spotlight layout for comparing two different machine learning classification models. + + Args: + label (str, optional): Name of the dataframe column that contains the label. Defaults to "label". + model1_prediction (str, optional): Name of the dataframe column that contains the prediction for model 1. Defaults to "m1_prediction". + model1_embedding (str, optional): Name of the dataframe column that contains thee embedding for model 1. Defaults to "". + model1_correct (str, optional): Name of the dataframe column that contains a flag if the data sample is predicted correctly by model 1. + model2_prediction (str, optional): Name of the dataframe column that contains the prediction for model 2. Defaults to "m2_prediction". + model2_embedding (str, optional): Name of the dataframe column that contains thee embedding for model 2. Defaults to "". + model2_correct (str, optional): Name and type of the dataframe columns that are displayed in the inspector, e.g. {'audio': spotlight.Audio}. Defaults to None. + inspect (Optional[dict], optional): Name of the dataframe column that contains a flag if the data sample is predicted correctly by model 1. + + Returns: + Layout: _description_ + """ + # first column: table + issues metrics = split( [ @@ -100,6 +118,8 @@ def model_compare_classification( ) column2_list.append(row3) + column2:Union[Tab, Split] + if len(column2_list) == 1: column2 = column2_list[0] elif len(column2_list) == 2: diff --git a/renumics/spotlight/layouts/model_debug.py b/renumics/spotlight/layouts/model_debug.py index 576b02a1..b5281979 100644 --- a/renumics/spotlight/layouts/model_debug.py +++ b/renumics/spotlight/layouts/model_debug.py @@ -16,13 +16,27 @@ from renumics.spotlight import Audio, Image -def model_debug_classification( + +def debug_classification( label: str = "label", prediction: str = "prediction", embedding: str = "", inspect: Optional[dict] = None, features: Optional[list] = None, ) -> Layout: + """This function generates a Spotlight layout for debugging a machine learning classification model. + + Args: + label (str, optional): Name of the dataframe column that contains the label. Defaults to "label". + prediction (str, optional): Name of the dataframe column that contains the prediction. Defaults to "prediction". + embedding (str, optional): Name of the dataframe column that contains the embedding. Defaults to "". + inspect (Optional[dict], optional): Name and type of the dataframe columns that are displayed in the inspector, e.g. {'audio': spotlight.Audio}. Defaults to None. + features (Optional[list], optional): Name of the dataframe columns that contain useful metadata and features. Defaults to None. + + Returns: + Layout: Layout to be displayed with Spotlight. + """ + # first column: table + issues metrics = tab( metric(name="Accuracy", metric="accuracy", columns=[label, prediction]), @@ -45,7 +59,7 @@ def model_debug_classification( ) ) - # third column: confusion matric, feature histograms (optional), embedding (optional) + # second column: confusion matric, feature histograms (optional), embedding (optional) if features is not None: histogram_list = [] for idx, feature in enumerate(features): From d93a64599c29aa84959deb4cd204f73908785b49 Mon Sep 17 00:00:00 2001 From: Stefan Suwelack Date: Tue, 19 Sep 2023 13:32:30 +0200 Subject: [PATCH 5/7] code linting --- renumics/spotlight/layouts/model_compare.py | 4 ++-- renumics/spotlight/layouts/model_debug.py | 7 +++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/renumics/spotlight/layouts/model_compare.py b/renumics/spotlight/layouts/model_compare.py index 75fb5ad1..019b1d94 100644 --- a/renumics/spotlight/layouts/model_compare.py +++ b/renumics/spotlight/layouts/model_compare.py @@ -42,7 +42,7 @@ def compare_classification( Returns: Layout: _description_ """ - + # first column: table + issues metrics = split( [ @@ -118,7 +118,7 @@ def compare_classification( ) column2_list.append(row3) - column2:Union[Tab, Split] + column2: Union[Tab, Split] if len(column2_list) == 1: column2 = column2_list[0] diff --git a/renumics/spotlight/layouts/model_debug.py b/renumics/spotlight/layouts/model_debug.py index b5281979..ddb95473 100644 --- a/renumics/spotlight/layouts/model_debug.py +++ b/renumics/spotlight/layouts/model_debug.py @@ -1,6 +1,8 @@ from renumics.spotlight import layout from renumics.spotlight.layout import ( Layout, + Tab, + Split, lenses, table, similaritymap, @@ -12,11 +14,10 @@ confusion_matrix, histogram, ) -from typing import Optional +from typing import Optional, Union from renumics.spotlight import Audio, Image - def debug_classification( label: str = "label", prediction: str = "prediction", @@ -82,6 +83,8 @@ def debug_classification( ) column2_list.append(row3) + column2: Union[Tab, Split] + if len(column2_list) == 1: column2 = column2_list[0] elif len(column2_list) == 2: From 111e8bc9dc57e210f4ccffee391afff1287e62ca Mon Sep 17 00:00:00 2001 From: Dominik Haentsch Date: Wed, 20 Sep 2023 09:19:17 +0200 Subject: [PATCH 6/7] cleanup docstrings and dtype usage --- renumics/spotlight/layouts/model_compare.py | 33 +++++++++++---------- renumics/spotlight/layouts/model_debug.py | 29 +++++++++--------- 2 files changed, 32 insertions(+), 30 deletions(-) diff --git a/renumics/spotlight/layouts/model_compare.py b/renumics/spotlight/layouts/model_compare.py index 019b1d94..a1a8f81d 100644 --- a/renumics/spotlight/layouts/model_compare.py +++ b/renumics/spotlight/layouts/model_compare.py @@ -1,3 +1,5 @@ +from typing import Optional, Union, Dict, Any +from renumics.spotlight import dtypes from renumics.spotlight import layout from renumics.spotlight.layout import ( Layout, @@ -13,8 +15,6 @@ issues, confusion_matrix, ) -from typing import Optional, Union -from renumics.spotlight import Audio, Image def compare_classification( @@ -25,22 +25,22 @@ def compare_classification( model2_prediction: str = "m2_prediction", model2_embedding: str = "", model2_correct: str = "", - inspect: Optional[dict] = None, + inspect: Optional[Dict[str, Any]] = None, ) -> Layout: """This function generates a Spotlight layout for comparing two different machine learning classification models. Args: - label (str, optional): Name of the dataframe column that contains the label. Defaults to "label". - model1_prediction (str, optional): Name of the dataframe column that contains the prediction for model 1. Defaults to "m1_prediction". - model1_embedding (str, optional): Name of the dataframe column that contains thee embedding for model 1. Defaults to "". - model1_correct (str, optional): Name of the dataframe column that contains a flag if the data sample is predicted correctly by model 1. - model2_prediction (str, optional): Name of the dataframe column that contains the prediction for model 2. Defaults to "m2_prediction". - model2_embedding (str, optional): Name of the dataframe column that contains thee embedding for model 2. Defaults to "". - model2_correct (str, optional): Name and type of the dataframe columns that are displayed in the inspector, e.g. {'audio': spotlight.Audio}. Defaults to None. - inspect (Optional[dict], optional): Name of the dataframe column that contains a flag if the data sample is predicted correctly by model 1. + label: Name of the column that contains the label. + model1_prediction: Name of the column that contains the prediction for model 1. + model1_embedding: Name of the column that contains thee embedding for model 1. + model1_correct: Name of the column that contains a flag if the data sample is predicted correctly by model 1. + model2_prediction: Name of the column that contains the prediction for model 2. + model2_embedding: Name of the column that contains thee embedding for model 2. + model2_correct: Name of the column that contains a flag if the data sample is predicted correctly by model 2. + inspect: Name and type of the columns that are displayed in the inspector, e.g. {'audio': spotlight.dtypes.audio_dtype}. Returns: - Layout: _description_ + The configured layout for `spotlight.show`. """ # first column: table + issues @@ -133,13 +133,14 @@ def compare_classification( # fourth column: inspector inspector_fields = [] if inspect: - for item, _type in inspect.items(): - if _type == Audio: + for item, dtype_like in inspect.items(): + dtype = dtypes.create_dtype(dtype_like) + if dtypes.is_audio_dtype(dtype): inspector_fields.append(lenses.audio(item)) - elif _type == Image: + elif dtypes.is_image_dtype(dtype): inspector_fields.append(lenses.image(item)) else: - print("Type {} not supported by this layout.".format(_type)) + print(f"Type {dtype} not supported by this layout.") inspector_fields.append(lenses.scalar(label)) inspector_fields.append(lenses.scalar(model1_prediction)) diff --git a/renumics/spotlight/layouts/model_debug.py b/renumics/spotlight/layouts/model_debug.py index ddb95473..d4a74d57 100644 --- a/renumics/spotlight/layouts/model_debug.py +++ b/renumics/spotlight/layouts/model_debug.py @@ -1,3 +1,4 @@ +from typing import Optional, Union, Dict, List, Any from renumics.spotlight import layout from renumics.spotlight.layout import ( Layout, @@ -14,28 +15,27 @@ confusion_matrix, histogram, ) -from typing import Optional, Union -from renumics.spotlight import Audio, Image +from renumics.spotlight.dtypes import create_dtype, is_audio_dtype, is_image_dtype def debug_classification( label: str = "label", prediction: str = "prediction", embedding: str = "", - inspect: Optional[dict] = None, - features: Optional[list] = None, + inspect: Optional[Dict[str, Any]] = None, + features: Optional[List[str]] = None, ) -> Layout: """This function generates a Spotlight layout for debugging a machine learning classification model. Args: - label (str, optional): Name of the dataframe column that contains the label. Defaults to "label". - prediction (str, optional): Name of the dataframe column that contains the prediction. Defaults to "prediction". - embedding (str, optional): Name of the dataframe column that contains the embedding. Defaults to "". - inspect (Optional[dict], optional): Name and type of the dataframe columns that are displayed in the inspector, e.g. {'audio': spotlight.Audio}. Defaults to None. - features (Optional[list], optional): Name of the dataframe columns that contain useful metadata and features. Defaults to None. + label: Name of the column that contains the label. + prediction: Name of the column that contains the prediction. + embedding: Name of the column that contains the embedding. + inspect: Name and type of the columns that are displayed in the inspector, e.g. {'audio': spotlight.dtypes.audio_dtype}. + features: Names of the columns that contain useful metadata and features. Returns: - Layout: Layout to be displayed with Spotlight. + The configured layout for `spotlight.show`. """ # first column: table + issues @@ -98,13 +98,14 @@ def debug_classification( # fourth column: inspector inspector_fields = [] if inspect: - for item, _type in inspect.items(): - if _type == Audio: + for item, dtype_like in inspect.items(): + dtype = create_dtype(dtype_like) + if is_audio_dtype(dtype): inspector_fields.append(lenses.audio(item)) - elif _type == Image: + elif is_image_dtype(dtype): inspector_fields.append(lenses.image(item)) else: - print("Type {} not supported by this layout.".format(_type)) + print("Type {} not supported by this layout.".format(dtype)) inspector_fields.append(lenses.scalar(label)) inspector_fields.append(lenses.scalar(prediction)) From 327849c99cd113fb2db32c5c6859a753278c6042 Mon Sep 17 00:00:00 2001 From: Dominik Haentsch Date: Wed, 20 Sep 2023 09:25:49 +0200 Subject: [PATCH 7/7] refactor: move default laoyut to layouts module --- renumics/spotlight/app.py | 10 +++++----- renumics/spotlight/layout/default.py | 15 --------------- renumics/spotlight/layouts/__init__.py | 3 ++- renumics/spotlight/layouts/default.py | 26 ++++++++++++++++++++++++++ 4 files changed, 33 insertions(+), 21 deletions(-) delete mode 100644 renumics/spotlight/layout/default.py create mode 100644 renumics/spotlight/layouts/default.py diff --git a/renumics/spotlight/app.py b/renumics/spotlight/app.py index 377c4424..c2c6758d 100644 --- a/renumics/spotlight/app.py +++ b/renumics/spotlight/app.py @@ -50,7 +50,7 @@ from renumics.spotlight.backend.middlewares.timing import add_timing_middleware from renumics.spotlight.app_config import AppConfig from renumics.spotlight.data_source import DataSource, create_datasource -from renumics.spotlight.layout.default import DEFAULT_LAYOUT +from renumics.spotlight import layouts from renumics.spotlight.data_store import DataStore @@ -86,7 +86,7 @@ class SpotlightApp(FastAPI): task_manager: TaskManager websocket_manager: Optional[WebsocketManager] - _layout: Optional[Layout] + _layout: Layout config: Config username: str filebrowsing_allowed: bool @@ -106,7 +106,7 @@ def __init__(self) -> None: self.task_manager = TaskManager() self.websocket_manager = None self.config = Config() - self._layout = None + self._layout = layouts.default() self.project_root = Path.cwd() self.vite_url = None self.username = "" @@ -368,11 +368,11 @@ def layout(self) -> Layout: """ Frontend layout """ - return self._layout or DEFAULT_LAYOUT + return self._layout @layout.setter def layout(self, layout: Optional[Layout]) -> None: - self._layout = layout + self._layout = layout or layouts.default() self._broadcast(ResetLayoutMessage()) async def get_current_layout_dict(self, user_id: str) -> Optional[Dict]: diff --git a/renumics/spotlight/layout/default.py b/renumics/spotlight/layout/default.py deleted file mode 100644 index ff590d32..00000000 --- a/renumics/spotlight/layout/default.py +++ /dev/null @@ -1,15 +0,0 @@ -""" -All-purpose default layout. -""" - -from . import split, tab -from . import table, histogram, inspector, layout, scatterplot, similaritymap - -DEFAULT_LAYOUT = layout( - split( - tab(table(), weight=60), - tab(similaritymap(), scatterplot(), histogram(), weight=40), - weight=60, - ), - tab(inspector(), weight=40), -) diff --git a/renumics/spotlight/layouts/__init__.py b/renumics/spotlight/layouts/__init__.py index 740fa53c..e350b792 100644 --- a/renumics/spotlight/layouts/__init__.py +++ b/renumics/spotlight/layouts/__init__.py @@ -1,4 +1,5 @@ +from .default import default from .model_debug import debug_classification from .model_compare import compare_classification -__all__ = ["debug_classification", "compare_classification"] +__all__ = ["default", "debug_classification", "compare_classification"] diff --git a/renumics/spotlight/layouts/default.py b/renumics/spotlight/layouts/default.py new file mode 100644 index 00000000..a4c7e0f6 --- /dev/null +++ b/renumics/spotlight/layouts/default.py @@ -0,0 +1,26 @@ +from renumics.spotlight.layout import ( + histogram, + inspector, + layout, + scatterplot, + similaritymap, + split, + tab, + table, +) +from renumics.spotlight.layout.nodes import Layout + + +def default() -> Layout: + """ + Default layout for spotlight. + """ + + return layout( + split( + tab(table(), weight=60), + tab(similaritymap(), scatterplot(), histogram(), weight=40), + weight=60, + ), + tab(inspector(), weight=40), + )