update

luochang212 · May 24, 2024 · ecaa98c · ecaa98c
1 parent bac9de6
commit ecaa98c
Show file tree

Hide file tree

Showing 18 changed files with 234 additions and 102 deletions.
diff --git a/.github/workflows/nox.yml b/.github/workflows/nox.yml
@@ -0,0 +1,45 @@
+name: Run nox tests
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+
+jobs:
+  test:
+    runs-on: ${{ matrix.os }}
+
+    strategy:
+      matrix:
+        os: [
+            ubuntu-22.04,
+            ubuntu-24.04,
+            windows-2019,
+            windows-2022,
+            windows-latest,
+            macos-11,
+            macos-12,
+            macos-13,
+            macos-14,
+          ]
+        python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install nox
+
+      - name: Run nox
+        run: nox --non-interactive --error-on-missing-interpreter
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,9 @@
 .DS_Store
-/dist/*
-/src/flameai/__pycache__/*
+.idea/
+.vscode/
+.pytest_cache/
+__pycache__/
+/src/flameai/__pycache__/
+/tests/__pycache__/
+dist/
+.nox/
diff --git a/noxfile.py b/noxfile.py
@@ -0,0 +1,14 @@
+import nox
+
+
+@nox.session(python=['3.8', '3.9', '3.10', '3.11', '3.12'])
+def tests(session):
+    session.install('pytest')
+    session.install('-e', '.')
+    session.run('pytest')
+
+
+@nox.session
+def lint(session):
+    session.install('flake8')
+    session.run('flake8')
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "flameai"
-version = "1.0.3"
+version = "1.0.4"
 description = "Deep Learning Toolkit."
 readme = "README.md"
 keywords = [
@@ -16,7 +16,7 @@ authors = [
   { name = "luochang" },
   { email = "luochang212@gmail.com" },
 ]
-requires-python = ">=3.10"
+requires-python = ">=3.8"
 dependencies = [
   "numpy>=1.26.4",
   "pandas>=2.2.0",
@@ -26,7 +26,6 @@ dependencies = [
   "seaborn>=0.13.2",
   "optuna>=3.6.1",
   "click>=8.1.7",
-  "torch>=2.2.2",
 ]
 classifiers = [
   "License :: OSI Approved :: Apache Software License",

diff --git a/src/flameai/__init__.py b/src/flameai/__init__.py
@@ -12,4 +12,4 @@
     'metrics',
     'mining',
     'plot',
-]
+]
diff --git a/src/flameai/__main__.py b/src/flameai/__main__.py
@@ -1,13 +1,14 @@
-# Usage: python -m flameai 
-from ._env import check_hive_env, check_python_env, num_gpus
+# Usage: python -m flameai
+from ._env import check_hive_env, check_python_env, num_gpus, HAS_TORCH
 
 
 def check_env():
     text = lambda e: 'YES' if e == 0 else 'NO'
     print(f'Python: {text(check_python_env())}')
     print(f'Hive:   {text(check_hive_env())}')
-    print(f'GPU:    {"YES" if num_gpus() >= 1 else "NO"}')
+    if HAS_TORCH:
+        print(f'GPU:    {"YES" if num_gpus() >= 1 else "NO"}')
 
 
 if __name__ == "__main__":
-    check_env()
+    check_env()
diff --git a/src/flameai/_env.py b/src/flameai/_env.py
@@ -1,5 +1,19 @@
 import subprocess
-import torch
+
+from .util import set_logger
+
+
+logger = set_logger()
+
+
+HAS_TORCH = None
+try:
+    import torch
+    HAS_TORCH = True
+except ImportError:
+    HAS_TORCH = False
+    logger.warning("PyTorch not found. Please install it using 'pip install torch'")
+    logger.warning("or 'pip install torch -i https://mirrors.aliyun.com/pypi/simple/'")
 
 
 def check_python_env() -> int:
@@ -49,4 +63,5 @@ def try_gpu(i: int = 0):
 if __name__ == '__main__':
     print('check_python_env:', check_python_env())
     print('check_hive_env:', check_hive_env())
-    print('try_gpu:', try_gpu())
+    if HAS_TORCH:
+        print('try_gpu:', try_gpu())
diff --git a/src/flameai/cmd.py b/src/flameai/cmd.py
@@ -31,12 +31,12 @@ def hive_cli(file_name: str) -> None:
         try:
             res = subprocess.run(command, shell=True, text=True)
             if res.returncode != 0:
-                logger.warning(f'Failed to execute query.')
+                logger.warning('Failed to execute query.')
                 logger.error(f'Error: {res.stderr}')
                 logger.error(f'returncode: {res.returncode}')
         except Exception as e:
             logger.error(f'An Error occurred: {e}')
 
 
 if __name__ == "__main__":
-    hive_cli()
+    hive_cli()
diff --git a/src/flameai/metrics.py b/src/flameai/metrics.py
@@ -1,5 +1,3 @@
-import numpy as np
-import pandas as pd
 import sklearn.metrics
 
 from enum import Enum
@@ -21,9 +19,9 @@ def lgb_feature_importance(gbm) -> None:
     :param gbm: The trained LightGBM model.
     """
     items = [(k, v) for k, v in zip(gbm.feature_name(), gbm.feature_importance())]
-    sorted_items = sorted(items, key = lambda e: e[1], reverse = True)
+    sorted_items = sorted(items, key=lambda e: e[1], reverse=True)
     for i, (k, v) in enumerate(sorted_items):
-        print(f'[rank {i+1}] {k}: {v}')
+        print(f'[rank {i + 1}] {k}: {v}')
 
 
 def eval_continuous(y_true, y_pred) -> None:
@@ -43,13 +41,14 @@ def eval_continuous(y_true, y_pred) -> None:
     print(f'r2_score: {r2_score:.5f}')
 
 
-def eval_binary(y_true,
-                y_pred,
-                threshold: Optional[float] = None,
-                metric: Metric = Metric.F1_SCORE,
-                n_trials: int = 200,
-                ret: bool = False
-    ) -> Optional[Tuple[Any, float]]:
+def eval_binary(
+    y_true,
+    y_pred,
+    threshold: Optional[float] = None,
+    metric: Metric = Metric.F1_SCORE,
+    n_trials: int = 200,
+    ret: bool = False
+) -> Optional[Tuple[Any, float]]:
     """
     Evaluate a binary classification task.
 
@@ -66,8 +65,8 @@ def eval_binary(y_true,
     """
 
     # Metrics that can be directly calculated using y_pred
-    auc = sklearn.metrics.roc_auc_score(y_true = y_true, y_score = y_pred)
-    log_loss = sklearn.metrics.log_loss(y_true = y_true, y_pred = y_pred)
+    auc = sklearn.metrics.roc_auc_score(y_true=y_true, y_score=y_pred)
+    log_loss = sklearn.metrics.log_loss(y_true=y_true, y_pred=y_pred)
 
     # If the threshold does not exist, obtain it
     if threshold is None:
@@ -76,11 +75,11 @@ def eval_binary(y_true,
     y_label = [1 if e > threshold else 0 for e in y_pred]
 
     # Metrics that require the predicted labels (y_label)
-    acc = sklearn.metrics.accuracy_score(y_true = y_true, y_pred = y_label)
-    precision = sklearn.metrics.precision_score(y_true = y_true, y_pred = y_label)
-    recall = sklearn.metrics.recall_score(y_true = y_true, y_pred = y_label)
-    f1 = sklearn.metrics.f1_score(y_true = y_true, y_pred = y_label)
-    cm = sklearn.metrics.confusion_matrix(y_true = y_true, y_pred = y_label)
+    acc = sklearn.metrics.accuracy_score(y_true=y_true, y_pred=y_label)
+    precision = sklearn.metrics.precision_score(y_true=y_true, y_pred=y_label)
+    recall = sklearn.metrics.recall_score(y_true=y_true, y_pred=y_label)
+    f1 = sklearn.metrics.f1_score(y_true=y_true, y_pred=y_label)
+    cm = sklearn.metrics.confusion_matrix(y_true=y_true, y_pred=y_label)
     tn, fp, fn, tp = cm.ravel()
 
     print(f'threshold: {threshold:.5f}')
@@ -97,4 +96,4 @@ def eval_binary(y_true,
     print(f'confusion matrix:\n{cm}')
 
     if ret:
-        return y_label, threshold
+        return y_label, threshold
diff --git a/src/flameai/mining.py b/src/flameai/mining.py
@@ -12,4 +12,4 @@ def value_counts(df: pd.DataFrame) -> pd.DataFrame:
     return pd.DataFrame({
         'col_name': df.columns,
         'val_cnt': val_cnt_list
-    })
+    })
diff --git a/src/flameai/plot.py b/src/flameai/plot.py
@@ -11,18 +11,18 @@ def roc_curve(y_true, y_score) -> None:
     :param y_true: An array of true binary labels.
     :param y_score: An array of predicted probabilities.
     """
-    fpr, tpr, thresholds = sklearn.metrics.roc_curve(y_true = y_true, y_score = y_score)
-    auc = sklearn.metrics.roc_auc_score(y_true = y_true, y_score = y_score)
+    fpr, tpr, thresholds = sklearn.metrics.roc_curve(y_true=y_true, y_score=y_score)
+    auc = sklearn.metrics.roc_auc_score(y_true=y_true, y_score=y_score)
     print(f'AUC: {auc:.5f}')
 
-    plt.figure(figsize = (8, 6))
+    plt.figure(figsize=(8, 6))
     plt.plot(fpr, tpr, label='ROC curve (AUC = {:.2f})'.format(auc))
 
     plt.title('Receiver Operating Characteristic (ROC) Curve')
     plt.xlabel('False Positive Rate (FPR)')
     plt.ylabel('True Positive Rate (TPR)')
     plt.legend(loc="lower right")
-    plt.grid(True, linestyle = 'dashed', alpha = 0.5)
+    plt.grid(True, linestyle='dashed', alpha=0.5)
 
     plt.show()
 
@@ -34,8 +34,8 @@ def confusion_matrix(y_true, y_label) -> None:
     :param y_true: An array of true binary labels.
     :param y_label: An array of labels predicted by the model.
     """
-    cm = sklearn.metrics.confusion_matrix(y_true = y_true, y_pred = y_label)
-    cm_matrix = pd.DataFrame(data = cm,
-                             columns = ['Predict Negative:0', 'Predict Positive:1'], 
-                             index = ['Actual Negative:0', 'Actual Positive:1'])
-    sns.heatmap(cm_matrix, annot = True, fmt = 'd', cmap = 'YlGnBu')
+    cm = sklearn.metrics.confusion_matrix(y_true=y_true, y_pred=y_label)
+    cm_matrix = pd.DataFrame(data=cm,
+                             columns=['Predict Negative:0', 'Predict Positive:1'],
+                             index=['Actual Negative:0', 'Actual Positive:1'])
+    sns.heatmap(cm_matrix, annot=True, fmt='d', cmap='YlGnBu')
diff --git a/src/flameai/preprocessing.py b/src/flameai/preprocessing.py
@@ -1,8 +1,6 @@
-import math
 import numpy as np
 import pandas as pd
 import sklearn.preprocessing
-import torch
 
 
 def label_encoder(df: pd.DataFrame) -> pd.DataFrame:
@@ -27,7 +25,7 @@ def gen_scale_pos_weight(y_train) -> float:
 
 
 class DataLoader:
-    def __init__(self, lst: list = []):
+    def __init__(self, lst: list):
         self.i = 0
         self._data = lst
 
@@ -49,16 +47,3 @@ def __next__(self):
             return self._data[self.i - 1]
         else:
             raise StopIteration
-
-
-def data_iter(data: list, batch_size: int) -> DataLoader:
-    """Split the original input data list into batches."""
-    lst = []
-    batch_num = math.floor(len(data) / batch_size)
-    for i in range(batch_num):
-        start, end = batch_size * i, batch_size * (i + 1)
-        X = torch.tensor([e[0] for e in data[start:end]])
-        y = torch.tensor([e[1] for e in data[start:end]])
-        lst.append((X, y))
-
-    return DataLoader(lst)
diff --git a/src/flameai/train.py b/src/flameai/train.py
@@ -1,16 +1,18 @@
 import numpy as np
 import scipy
-import sklearn.metrics
 import optuna
 
+from typing import Optional
+
 
 class AdaptiveLearningRate:
     """Customized learning rate decay"""
+
     def __init__(self,
-                 learning_rate: float = 0.3,
-                 decay_rate: float = 0.9,
-                 patience: int = 10
-        ) -> None:
+        learning_rate: float = 0.3,
+        decay_rate: float = 0.9,
+        patience: int = 10
+    ) -> None:
         self.learning_rate = learning_rate
         self.decay_rate = decay_rate
         self.patience = patience
@@ -52,10 +54,10 @@ def gen_threshold(y_true, y_pred, metric, n_trials: int) -> float:
     def objective(trial):
         t = trial.suggest_float('threshold', 0.0, 1.0)
         y_label = [1 if e > t else 0 for e in y_pred]
-        return metric(y_true = y_true, y_pred = y_label)
+        return metric(y_true=y_true, y_pred=y_label)
 
-    study = optuna.create_study(direction = 'maximize')
-    study.optimize(objective, n_trials = n_trials)
+    study = optuna.create_study(direction='maximize')
+    study.optimize(objective, n_trials=n_trials)
     best_params = study.best_params
 
     # Restore the original logging level
@@ -64,7 +66,7 @@ def objective(trial):
     return best_params['threshold']
 
 
-def gen_threshold_cdf(y_pred, rate: float, interval: int = 100) -> float:
+def gen_threshold_cdf(y_pred, rate: float, interval: int = 100) -> Optional[float]:
     """
     Finds the optimal threshold based on the desired proportion of negative samples (label 0)
 
@@ -81,8 +83,7 @@ def gen_threshold_cdf(y_pred, rate: float, interval: int = 100) -> float:
     px = 0
     for x, y in zip(xx, cdf):
         if y > rate:
-            xa = (px + x) / 2
-            break
+            return (px + x) / 2
         px = x
 
-    return xa
+    return None
-Original file line number
+Diff line change
@@ Expand Up / @@ -12,4 +12,4 @@ @@
         'metrics',
         'mining',
         'plot',
-    ]
+    ]