Skip to content

Commit

Permalink
Merge pull request #41 from serengil/feat-task-2312-retire-imp-module
Browse files Browse the repository at this point in the history
load module for after python 3.11
  • Loading branch information
serengil authored Dec 23, 2023
2 parents fd0d0dd + 5cb9fe9 commit c9d1d1e
Show file tree
Hide file tree
Showing 8 changed files with 59 additions and 39 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,8 @@ prediction = chef.predict(model, param = ['Sunny', 'Hot', 'High', 'Weak'])
You can consume built decision trees directly as well. In this way, you can restore already built decision trees and skip learning steps, or apply [transfer learning](https://youtu.be/9hX8ir7_ZtA). Loaded trees offer you findDecision method to test for new instances.

```python
moduleName = "outputs/rules/rules" #this will load outputs/rules/rules.py
tree = chef.restoreTree(moduleName)
module_name = "outputs/rules/rules" #this will load outputs/rules/rules.py
tree = chef.restoreTree(module_name)
prediction = tree.findDecision(['Sunny', 'Hot', 'High', 'Weak'])
```

Expand Down
6 changes: 3 additions & 3 deletions chefboost/Chefboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,16 +443,16 @@ def load_model(file_name: str = "model.pkl") -> dict:
return model


def restoreTree(moduleName) -> dict:
def restoreTree(module_name) -> Any:
"""
Load built model from set of decision rules
Args:
moduleName (str): e.g. outputs/rules/rules to restore outputs/rules/rules.py
module_name (str): e.g. outputs/rules/rules to restore outputs/rules/rules.py
Returns:
built model (dict)
"""

return functions.restoreTree(moduleName)
return functions.restoreTree(module_name)


def feature_importance(rules: Union[str, list]) -> pd.DataFrame:
Expand Down
7 changes: 3 additions & 4 deletions chefboost/commons/functions.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import pathlib
import imp # pylint: disable=deprecated-module
import os
from os import path
import multiprocessing
from typing import Optional
import numpy as np
from chefboost import Chefboost as cb
from chefboost.commons.logger import Logger
from chefboost.commons.module import load_module

# pylint: disable=no-else-return, broad-except

Expand All @@ -23,9 +23,8 @@ def bulk_prediction(df, model):
df["Prediction"] = predictions


def restoreTree(moduleName):
fp, pathname, description = imp.find_module(moduleName)
return imp.load_module(moduleName, fp, pathname, description)
def restoreTree(module_name):
return load_module(module_name)


def softmax(w):
Expand Down
29 changes: 29 additions & 0 deletions chefboost/commons/module.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import sys
from types import ModuleType

# pylint: disable=no-else-return


def load_module(module_name: str) -> ModuleType:
"""
Load python module with its name
Args:
module_name (str): module name without .py extension
Returns:
module (ModuleType)
"""
if sys.version_info >= (3, 11):
import importlib.util

spec = importlib.util.find_spec(module_name)
if spec is None:
raise ImportError(f"Module '{module_name}' not found")

module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
return module
else:
import imp # pylint: disable=deprecated-module

fp, pathname, description = imp.find_module(module_name)
return imp.load_module(module_name, fp, pathname, description)
12 changes: 5 additions & 7 deletions chefboost/training/Training.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import math
import imp # pylint:disable=deprecated-module
import uuid
import json
import copy
Expand All @@ -15,6 +14,7 @@
from chefboost.training import Preprocess
from chefboost.commons import functions
from chefboost.commons.logger import Logger
from chefboost.commons.module import load_module

# pylint: disable=too-many-function-args, unused-argument

Expand Down Expand Up @@ -668,9 +668,8 @@ def buildDecisionTree(
):
# this is reguler decision tree. find accuracy here.

moduleName = "outputs/rules/rules"
fp, pathname, description = imp.find_module(moduleName)
myrules = imp.load_module(moduleName, fp, pathname, description) # rules0
module_name = "outputs/rules/rules"
myrules = load_module(module_name) # rules0
models.append(myrules)

return models
Expand All @@ -682,9 +681,8 @@ def findPrediction(row):
for j in range(0, num_of_features):
params.append(row[j])

moduleName = "outputs/rules/rules"
fp, pathname, description = imp.find_module(moduleName)
myrules = imp.load_module(moduleName, fp, pathname, description) # rules0
module_name = "outputs/rules/rules"
myrules = load_module(module_name) # rules0

prediction = myrules.findDecision(params)
return prediction
Expand Down
12 changes: 5 additions & 7 deletions chefboost/tuning/adaboost.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import imp # pylint: disable=deprecated-module
import math

import pandas as pd
Expand All @@ -8,6 +7,7 @@
from chefboost.commons import functions
from chefboost.training import Training
from chefboost.commons.logger import Logger
from chefboost.commons.module import load_module

# pylint: disable=unused-argument

Expand All @@ -23,9 +23,8 @@ def findPrediction(row):
for j in range(0, columns - 1):
params.append(row[j])

moduleName = f"outputs/rules/rules_{int(epoch)}"
fp, pathname, description = imp.find_module(moduleName)
myrules = imp.load_module(moduleName, fp, pathname, description)
module_name = f"outputs/rules/rules_{int(epoch)}"
myrules = load_module(module_name)

prediction = functions.sign(myrules.findDecision(params))

Expand Down Expand Up @@ -79,9 +78,8 @@ def apply(df, config, header, dataset_features, validation_df=None, process_id=N

# ---------------------------------------

moduleName = "outputs/rules/rules_" + str(i)
fp, pathname, description = imp.find_module(moduleName)
myrules = imp.load_module(moduleName, fp, pathname, description)
module_name = "outputs/rules/rules_" + str(i)
myrules = load_module(module_name)
models.append(myrules)

# ---------------------------------------
Expand Down
17 changes: 7 additions & 10 deletions chefboost/tuning/gbm.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import imp # pylint: disable=deprecated-module
import gc

import pandas as pd
Expand All @@ -8,6 +7,7 @@
from chefboost.commons import functions
from chefboost.training import Training
from chefboost.commons.logger import Logger
from chefboost.commons.module import load_module

# pylint: disable=unused-argument

Expand All @@ -23,9 +23,8 @@ def findPrediction(row):
for j in range(0, columns - 1):
params.append(row[j])

moduleName = f"outputs/rules/rules{epoch - 1}"
fp, pathname, description = imp.find_module(moduleName)
myrules = imp.load_module(moduleName, fp, pathname, description)
module_name = f"outputs/rules/rules{epoch - 1}"
myrules = load_module(module_name)

# prediction = int(myrules.findDecision(params))
prediction = myrules.findDecision(params)
Expand Down Expand Up @@ -81,9 +80,8 @@ def regressor(df, config, header, dataset_features, validation_df=None, process_
# run data(i-1) and rules(i-1), save data1

# dynamic import
moduleName = f"outputs/rules/rules{index - 1}"
fp, pathname, description = imp.find_module(moduleName)
myrules = imp.load_module(moduleName, fp, pathname, description) # rules0
module_name = f"outputs/rules/rules{index - 1}"
myrules = load_module(module_name) # rules0

models.append(myrules)

Expand Down Expand Up @@ -237,9 +235,8 @@ def classifier(df, config, header, dataset_features, validation_df=None, process
# ----------------------------

# dynamic import
moduleName = "outputs/rules/rules-for-" + current_class + "-round-" + str(epoch)
fp, pathname, description = imp.find_module(moduleName)
myrules = imp.load_module(moduleName, fp, pathname, description) # rules0
module_name = "outputs/rules/rules-for-" + current_class + "-round-" + str(epoch)
myrules = load_module(module_name) # rules0

models.append(myrules)

Expand Down
11 changes: 5 additions & 6 deletions chefboost/tuning/randomforest.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import multiprocessing
from contextlib import closing
import imp # pylint: disable=deprecated-module

from tqdm import tqdm

from chefboost.commons import functions
from chefboost.training import Training
from chefboost.commons.module import load_module

# pylint: disable=unused-argument

Expand All @@ -31,8 +31,8 @@ def apply(df, config, header, dataset_features, validation_df=None, process_id=N

root = 1

moduleName = "outputs/rules/rule_" + str(i)
file = moduleName + ".py"
module_name = "outputs/rules/rule_" + str(i)
file = module_name + ".py"

functions.createFile(file, header)

Expand Down Expand Up @@ -85,9 +85,8 @@ def apply(df, config, header, dataset_features, validation_df=None, process_id=N
# -------------------------------
# collect models for both serial and parallel here
for i in range(0, num_of_trees):
moduleName = "outputs/rules/rule_" + str(i)
fp, pathname, description = imp.find_module(moduleName)
myrules = imp.load_module(moduleName, fp, pathname, description)
module_name = "outputs/rules/rule_" + str(i)
myrules = load_module(module_name)
models.append(myrules)

# -------------------------------
Expand Down

0 comments on commit c9d1d1e

Please sign in to comment.