red-hat-data-services · FedeAlonso · Jul 5, 2024 · diegolovison · Jul 8, 2024
diff --git a/ods_ci/tests/Resources/Files/pipeline-samples/v2/metrics/random_metrics_visualization.py b/ods_ci/tests/Resources/Files/pipeline-samples/v2/metrics/random_metrics_visualization.py
@@ -0,0 +1,165 @@
+# Copyright 2021 The Kubeflow Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+
+from kfp import dsl, compiler
+from kfp.dsl import (component, Output, ClassificationMetrics, Metrics, HTML,
+                     Markdown)
+
+# In tests, we install a KFP package from the PR under test. Users should not
+# normally need to specify `kfp_package_path` in their component definitions.
+_KFP_PACKAGE_PATH = os.getenv('KFP_PACKAGE_PATH')
+
+
+@component(
+    packages_to_install=['scikit-learn'],
+    base_image='registry.redhat.io/ubi8/python-39@sha256:3523b184212e1f2243e76d8094ab52b01ea3015471471290d011625e1763af61',
+    kfp_package_path=_KFP_PACKAGE_PATH,
+)
+def digit_classification(metrics: Output[Metrics]):
+    import random
+    from sklearn import model_selection
+    from sklearn.linear_model import LogisticRegression
+    from sklearn import datasets
+    from sklearn.metrics import accuracy_score
+
+    # Load digits dataset
+    iris = datasets.load_iris()
+
+    # # Create feature matrix
+    X = iris.data
+
+    # Create target vector
+    y = iris.target
+
+    #test size
+    test_size = 0.33
+
+    seed = 7
+    #cross-validation settings
+    kfold = model_selection.KFold(n_splits=10, random_state=seed, shuffle=True)
+
+    #Model instance
+    model = LogisticRegression()
+    scoring = 'accuracy'
+    results = model_selection.cross_val_score(
+        model, X, y, cv=kfold, scoring=scoring)
+
+    #split data
+    X_train, X_test, y_train, y_test = model_selection.train_test_split(
+        X, y, test_size=test_size, random_state=seed)
+    #fit model
+    model.fit(X_train, y_train)
+
+    #accuracy on test set
+    result = model.score(X_test, y_test)
+    metrics.log_metric('accuracy', (result * 100.0))
+    metrics.log_metric('metric2', random.randint(20, 40))
+    metrics.log_metric('metric3', random.randint(20, 40))
+    metrics.log_metric('metric4', random.randint(20, 40))
+    metrics.log_metric('metric5', random.randint(20, 60))
+    metrics.log_metric('metric6', random.randint(200, 300))
+    metrics.log_metric('metric7', random.randint(1000, 3000))
+    metrics.log_metric('metric8', random.randint(20, 50))
+    metrics.log_metric('metric9', random.randint(0, 20))
+    metrics.log_metric('metric10', random.randint(20, 30))
+    metrics.log_metric('metric11', random.randint(30, 40))
+    metrics.log_metric('metric12', random.randint(40, 50))
+
+
+@component(
+    packages_to_install=['scikit-learn'],
+    base_image='python:3.9',
+    kfp_package_path=_KFP_PACKAGE_PATH,
+)
+def wine_classification(metrics: Output[ClassificationMetrics]):
+    from sklearn.ensemble import RandomForestClassifier
+    from sklearn.metrics import roc_curve
+    from sklearn.datasets import load_wine
+    from sklearn.model_selection import train_test_split, cross_val_predict
+
+    X, y = load_wine(return_X_y=True)
+    # Binary classification problem for label 1.
+    y = y == 1
+
+    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
+    rfc = RandomForestClassifier(n_estimators=10, random_state=42)
+    rfc.fit(X_train, y_train)
+    y_scores = cross_val_predict(
+        rfc, X_train, y_train, cv=3, method='predict_proba')
+    y_predict = cross_val_predict(rfc, X_train, y_train, cv=3, method='predict')
+    fpr, tpr, thresholds = roc_curve(
+        y_true=y_train, y_score=y_scores[:, 1], pos_label=True)
+
+    # avoid inf thresholds
+    epsilon = 1e-6
+    thresholds = [1 - epsilon if t == float('inf') else t for t in thresholds]
+
+    metrics.log_roc_curve(fpr, tpr, thresholds)
+
+
+@component(
+    packages_to_install=['scikit-learn'],
+    base_image='python:3.9',
+    kfp_package_path=_KFP_PACKAGE_PATH,
+)
+def iris_sgdclassifier(test_samples_fraction: float,
+                       metrics: Output[ClassificationMetrics]):
+    from sklearn import datasets, model_selection
+    from sklearn.linear_model import SGDClassifier
+    from sklearn.metrics import confusion_matrix
+
+    iris_dataset = datasets.load_iris()
+    train_x, test_x, train_y, test_y = model_selection.train_test_split(
+        iris_dataset['data'],
+        iris_dataset['target'],
+        test_size=test_samples_fraction)
+
+    classifier = SGDClassifier()
+    classifier.fit(train_x, train_y)
+    predictions = model_selection.cross_val_predict(
+        classifier, train_x, train_y, cv=3)
+    metrics.log_confusion_matrix(
+        ['Setosa', 'Versicolour', 'Virginica'],
+        confusion_matrix(
+            train_y,
+            predictions).tolist()  # .tolist() to convert np array to list.
+    )
+
+
+@component(
+    kfp_package_path=_KFP_PACKAGE_PATH,)
+def html_visualization(html_artifact: Output[HTML]):
+    html_content = '<!DOCTYPE html><html><body><h1>Hello world</h1></body></html>'
+    with open(html_artifact.path, 'w') as f:
+        f.write(html_content)
+
+
+@component(
+    kfp_package_path=_KFP_PACKAGE_PATH,)
+def markdown_visualization(markdown_artifact: Output[Markdown]):
+    markdown_content = '## Hello world \n\n Markdown content'
+    with open(markdown_artifact.path, 'w') as f:
+        f.write(markdown_content)
+
+
+@dsl.pipeline(name='metrics-visualization-pipeline')
+def metrics_visualization_pipeline():
+    wine_classification_op = wine_classification()
+    iris_sgdclassifier_op = iris_sgdclassifier(test_samples_fraction=0.3)
+    digit_classification_op = digit_classification()
+    html_visualization_op = html_visualization()
+    markdown_visualization_op = markdown_visualization()
+
+compiler.Compiler().compile(pipeline_func=metrics_visualization_pipeline, package_path=__file__.replace(".py", "_compiled.yaml"))