Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
luochang212 committed May 24, 2024
1 parent bac9de6 commit ecaa98c
Show file tree
Hide file tree
Showing 18 changed files with 234 additions and 102 deletions.
45 changes: 45 additions & 0 deletions .github/workflows/nox.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
name: Run nox tests

on:
push:
branches:
- main
pull_request:
branches:
- main

jobs:
test:
runs-on: ${{ matrix.os }}

strategy:
matrix:
os: [
ubuntu-22.04,
ubuntu-24.04,
windows-2019,
windows-2022,
windows-latest,
macos-11,
macos-12,
macos-13,
macos-14,
]
python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install nox
- name: Run nox
run: nox --non-interactive --error-on-missing-interpreter
10 changes: 8 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
.DS_Store
/dist/*
/src/flameai/__pycache__/*
.idea/
.vscode/
.pytest_cache/
__pycache__/
/src/flameai/__pycache__/
/tests/__pycache__/
dist/
.nox/
14 changes: 14 additions & 0 deletions noxfile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import nox


@nox.session(python=['3.8', '3.9', '3.10', '3.11', '3.12'])
def tests(session):
session.install('pytest')
session.install('-e', '.')
session.run('pytest')


@nox.session
def lint(session):
session.install('flake8')
session.run('flake8')
5 changes: 2 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "hatchling.build"

[project]
name = "flameai"
version = "1.0.3"
version = "1.0.4"
description = "Deep Learning Toolkit."
readme = "README.md"
keywords = [
Expand All @@ -16,7 +16,7 @@ authors = [
{ name = "luochang" },
{ email = "luochang212@gmail.com" },
]
requires-python = ">=3.10"
requires-python = ">=3.8"
dependencies = [
"numpy>=1.26.4",
"pandas>=2.2.0",
Expand All @@ -26,7 +26,6 @@ dependencies = [
"seaborn>=0.13.2",
"optuna>=3.6.1",
"click>=8.1.7",
"torch>=2.2.2",
]
classifiers = [
"License :: OSI Approved :: Apache Software License",
Expand Down
2 changes: 1 addition & 1 deletion src/flameai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@
'metrics',
'mining',
'plot',
]
]
9 changes: 5 additions & 4 deletions src/flameai/__main__.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
# Usage: python -m flameai
from ._env import check_hive_env, check_python_env, num_gpus
# Usage: python -m flameai
from ._env import check_hive_env, check_python_env, num_gpus, HAS_TORCH


def check_env():
text = lambda e: 'YES' if e == 0 else 'NO'
print(f'Python: {text(check_python_env())}')
print(f'Hive: {text(check_hive_env())}')
print(f'GPU: {"YES" if num_gpus() >= 1 else "NO"}')
if HAS_TORCH:
print(f'GPU: {"YES" if num_gpus() >= 1 else "NO"}')


if __name__ == "__main__":
check_env()
check_env()
19 changes: 17 additions & 2 deletions src/flameai/_env.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
import subprocess
import torch

from .util import set_logger


logger = set_logger()


HAS_TORCH = None
try:
import torch
HAS_TORCH = True
except ImportError:
HAS_TORCH = False
logger.warning("PyTorch not found. Please install it using 'pip install torch'")
logger.warning("or 'pip install torch -i https://mirrors.aliyun.com/pypi/simple/'")


def check_python_env() -> int:
Expand Down Expand Up @@ -49,4 +63,5 @@ def try_gpu(i: int = 0):
if __name__ == '__main__':
print('check_python_env:', check_python_env())
print('check_hive_env:', check_hive_env())
print('try_gpu:', try_gpu())
if HAS_TORCH:
print('try_gpu:', try_gpu())
4 changes: 2 additions & 2 deletions src/flameai/cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,12 @@ def hive_cli(file_name: str) -> None:
try:
res = subprocess.run(command, shell=True, text=True)
if res.returncode != 0:
logger.warning(f'Failed to execute query.')
logger.warning('Failed to execute query.')
logger.error(f'Error: {res.stderr}')
logger.error(f'returncode: {res.returncode}')
except Exception as e:
logger.error(f'An Error occurred: {e}')


if __name__ == "__main__":
hive_cli()
hive_cli()
37 changes: 18 additions & 19 deletions src/flameai/metrics.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import numpy as np
import pandas as pd
import sklearn.metrics

from enum import Enum
Expand All @@ -21,9 +19,9 @@ def lgb_feature_importance(gbm) -> None:
:param gbm: The trained LightGBM model.
"""
items = [(k, v) for k, v in zip(gbm.feature_name(), gbm.feature_importance())]
sorted_items = sorted(items, key = lambda e: e[1], reverse = True)
sorted_items = sorted(items, key=lambda e: e[1], reverse=True)
for i, (k, v) in enumerate(sorted_items):
print(f'[rank {i+1}] {k}: {v}')
print(f'[rank {i + 1}] {k}: {v}')


def eval_continuous(y_true, y_pred) -> None:
Expand All @@ -43,13 +41,14 @@ def eval_continuous(y_true, y_pred) -> None:
print(f'r2_score: {r2_score:.5f}')


def eval_binary(y_true,
y_pred,
threshold: Optional[float] = None,
metric: Metric = Metric.F1_SCORE,
n_trials: int = 200,
ret: bool = False
) -> Optional[Tuple[Any, float]]:
def eval_binary(
y_true,
y_pred,
threshold: Optional[float] = None,
metric: Metric = Metric.F1_SCORE,
n_trials: int = 200,
ret: bool = False
) -> Optional[Tuple[Any, float]]:
"""
Evaluate a binary classification task.
Expand All @@ -66,8 +65,8 @@ def eval_binary(y_true,
"""

# Metrics that can be directly calculated using y_pred
auc = sklearn.metrics.roc_auc_score(y_true = y_true, y_score = y_pred)
log_loss = sklearn.metrics.log_loss(y_true = y_true, y_pred = y_pred)
auc = sklearn.metrics.roc_auc_score(y_true=y_true, y_score=y_pred)
log_loss = sklearn.metrics.log_loss(y_true=y_true, y_pred=y_pred)

# If the threshold does not exist, obtain it
if threshold is None:
Expand All @@ -76,11 +75,11 @@ def eval_binary(y_true,
y_label = [1 if e > threshold else 0 for e in y_pred]

# Metrics that require the predicted labels (y_label)
acc = sklearn.metrics.accuracy_score(y_true = y_true, y_pred = y_label)
precision = sklearn.metrics.precision_score(y_true = y_true, y_pred = y_label)
recall = sklearn.metrics.recall_score(y_true = y_true, y_pred = y_label)
f1 = sklearn.metrics.f1_score(y_true = y_true, y_pred = y_label)
cm = sklearn.metrics.confusion_matrix(y_true = y_true, y_pred = y_label)
acc = sklearn.metrics.accuracy_score(y_true=y_true, y_pred=y_label)
precision = sklearn.metrics.precision_score(y_true=y_true, y_pred=y_label)
recall = sklearn.metrics.recall_score(y_true=y_true, y_pred=y_label)
f1 = sklearn.metrics.f1_score(y_true=y_true, y_pred=y_label)
cm = sklearn.metrics.confusion_matrix(y_true=y_true, y_pred=y_label)
tn, fp, fn, tp = cm.ravel()

print(f'threshold: {threshold:.5f}')
Expand All @@ -97,4 +96,4 @@ def eval_binary(y_true,
print(f'confusion matrix:\n{cm}')

if ret:
return y_label, threshold
return y_label, threshold
2 changes: 1 addition & 1 deletion src/flameai/mining.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ def value_counts(df: pd.DataFrame) -> pd.DataFrame:
return pd.DataFrame({
'col_name': df.columns,
'val_cnt': val_cnt_list
})
})
18 changes: 9 additions & 9 deletions src/flameai/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,18 @@ def roc_curve(y_true, y_score) -> None:
:param y_true: An array of true binary labels.
:param y_score: An array of predicted probabilities.
"""
fpr, tpr, thresholds = sklearn.metrics.roc_curve(y_true = y_true, y_score = y_score)
auc = sklearn.metrics.roc_auc_score(y_true = y_true, y_score = y_score)
fpr, tpr, thresholds = sklearn.metrics.roc_curve(y_true=y_true, y_score=y_score)
auc = sklearn.metrics.roc_auc_score(y_true=y_true, y_score=y_score)
print(f'AUC: {auc:.5f}')

plt.figure(figsize = (8, 6))
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, label='ROC curve (AUC = {:.2f})'.format(auc))

plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.xlabel('False Positive Rate (FPR)')
plt.ylabel('True Positive Rate (TPR)')
plt.legend(loc="lower right")
plt.grid(True, linestyle = 'dashed', alpha = 0.5)
plt.grid(True, linestyle='dashed', alpha=0.5)

plt.show()

Expand All @@ -34,8 +34,8 @@ def confusion_matrix(y_true, y_label) -> None:
:param y_true: An array of true binary labels.
:param y_label: An array of labels predicted by the model.
"""
cm = sklearn.metrics.confusion_matrix(y_true = y_true, y_pred = y_label)
cm_matrix = pd.DataFrame(data = cm,
columns = ['Predict Negative:0', 'Predict Positive:1'],
index = ['Actual Negative:0', 'Actual Positive:1'])
sns.heatmap(cm_matrix, annot = True, fmt = 'd', cmap = 'YlGnBu')
cm = sklearn.metrics.confusion_matrix(y_true=y_true, y_pred=y_label)
cm_matrix = pd.DataFrame(data=cm,
columns=['Predict Negative:0', 'Predict Positive:1'],
index=['Actual Negative:0', 'Actual Positive:1'])
sns.heatmap(cm_matrix, annot=True, fmt='d', cmap='YlGnBu')
17 changes: 1 addition & 16 deletions src/flameai/preprocessing.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import math
import numpy as np
import pandas as pd
import sklearn.preprocessing
import torch


def label_encoder(df: pd.DataFrame) -> pd.DataFrame:
Expand All @@ -27,7 +25,7 @@ def gen_scale_pos_weight(y_train) -> float:


class DataLoader:
def __init__(self, lst: list = []):
def __init__(self, lst: list):
self.i = 0
self._data = lst

Expand All @@ -49,16 +47,3 @@ def __next__(self):
return self._data[self.i - 1]
else:
raise StopIteration


def data_iter(data: list, batch_size: int) -> DataLoader:
"""Split the original input data list into batches."""
lst = []
batch_num = math.floor(len(data) / batch_size)
for i in range(batch_num):
start, end = batch_size * i, batch_size * (i + 1)
X = torch.tensor([e[0] for e in data[start:end]])
y = torch.tensor([e[1] for e in data[start:end]])
lst.append((X, y))

return DataLoader(lst)
25 changes: 13 additions & 12 deletions src/flameai/train.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
import numpy as np
import scipy
import sklearn.metrics
import optuna

from typing import Optional


class AdaptiveLearningRate:
"""Customized learning rate decay"""

def __init__(self,
learning_rate: float = 0.3,
decay_rate: float = 0.9,
patience: int = 10
) -> None:
learning_rate: float = 0.3,
decay_rate: float = 0.9,
patience: int = 10
) -> None:
self.learning_rate = learning_rate
self.decay_rate = decay_rate
self.patience = patience
Expand Down Expand Up @@ -52,10 +54,10 @@ def gen_threshold(y_true, y_pred, metric, n_trials: int) -> float:
def objective(trial):
t = trial.suggest_float('threshold', 0.0, 1.0)
y_label = [1 if e > t else 0 for e in y_pred]
return metric(y_true = y_true, y_pred = y_label)
return metric(y_true=y_true, y_pred=y_label)

study = optuna.create_study(direction = 'maximize')
study.optimize(objective, n_trials = n_trials)
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=n_trials)
best_params = study.best_params

# Restore the original logging level
Expand All @@ -64,7 +66,7 @@ def objective(trial):
return best_params['threshold']


def gen_threshold_cdf(y_pred, rate: float, interval: int = 100) -> float:
def gen_threshold_cdf(y_pred, rate: float, interval: int = 100) -> Optional[float]:
"""
Finds the optimal threshold based on the desired proportion of negative samples (label 0)
Expand All @@ -81,8 +83,7 @@ def gen_threshold_cdf(y_pred, rate: float, interval: int = 100) -> float:
px = 0
for x, y in zip(xx, cdf):
if y > rate:
xa = (px + x) / 2
break
return (px + x) / 2
px = x

return xa
return None
Loading

0 comments on commit ecaa98c

Please sign in to comment.