Skip to content

Commit

Permalink
Replace print with logging.
Browse files Browse the repository at this point in the history
  • Loading branch information
miaohancheng committed Nov 10, 2024
1 parent a42d5c0 commit 1bc8c4b
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 13 deletions.
24 changes: 13 additions & 11 deletions pysmatch/Matcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler

import logging
from pysmatch import *
import pysmatch.functions as uf
from catboost import CatBoostClassifier
Expand All @@ -12,7 +12,9 @@
from imblearn.over_sampling import RandomOverSampler
from sklearn.model_selection import train_test_split


logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S')
class Matcher:
"""
Matcher Class -- Match data for an observational study.
Expand Down Expand Up @@ -73,9 +75,9 @@ def __init__(self, test, control, yvar, formula=None, exclude=None):
self.minority, self.majority = 1, 0
else:
self.minority, self.majority = 0, 1
print('Formula:\n{} ~ {}'.format(yvar, '+'.join(self.xvars)))
print('n majority:', len(self.data[self.data[yvar] == self.majority]))
print('n minority:', len(self.data[self.data[yvar] == self.minority]))
logging.info(f'Formula:{yvar} ~ {"+".join(self.xvars)}')
logging.info(f'n majority:{len(self.data[self.data[yvar] == self.majority])}')
logging.info(f'n minority:{len(self.data[self.data[yvar] == self.minority])}')
def preprocess_data(self, X, fit_scaler=False, index=None):
X_encoded = pd.get_dummies(X)

Expand Down Expand Up @@ -128,14 +130,14 @@ def fit_model(self, index, X, y, model_type, balance):
accuracy = model.score(X_processed, y_resampled)
else:
raise ValueError("Invalid model_type. Choose from 'linear', 'tree', or 'knn'.")
print(f"Model {index + 1}/{self.nmodels} trained. Accuracy: {accuracy:.2%}")
logging.info(f"Model {index + 1}/{self.nmodels} trained. Accuracy: {accuracy:.2%}")
return {'model': model, 'accuracy': accuracy}

def fit_scores(self, balance=True, nmodels=None, n_jobs=1, model_type='linear'):
self.models, self.model_accuracy = [], []
self.model_type = model_type
num_cores = mp.cpu_count()
print(f"This computer has: {num_cores} cores, The workers will be: {min(num_cores, n_jobs)}")
logging.info(f"This computer has: {num_cores} cores, The workers will be: {min(num_cores, n_jobs)}")

if balance and nmodels is None:
minor, major = [self.data[self.data[self.yvar] == i] for i in (self.minority, self.majority)]
Expand All @@ -150,12 +152,12 @@ def fit_scores(self, balance=True, nmodels=None, n_jobs=1, model_type='linear'):
for res in results:
self.models.append(res['model'])
self.model_accuracy.append(res['accuracy'])
print("\nAverage Accuracy:", "{:.2f}%".format(np.mean(self.model_accuracy) * 100))
logging.info(f"Average Accuracy:{round(np.mean((self.model_accuracy) * 100),2)}% ")
else:
result = self.fit_model(0, self.X, self.y, self.model_type, balance)
self.models.append(result['model'])
self.model_accuracy.append(result['accuracy'])
print("\nAccuracy:", "{:.2f}%".format(self.model_accuracy[0] * 100))
logging.info(f"Accuracy:{round(self.model_accuracy[0] * 100,2)}%")

def predict_scores(self):
"""
Expand Down Expand Up @@ -221,7 +223,7 @@ def match(self, threshold=0.001, nmatches=1, method='min', max_rand=10, replacem
None
"""
if 'scores' not in self.data.columns: # Check if the propensity scores are already calculated
print("Propensity Scores have not been calculated. Using defaults...")
logging.info("Propensity Scores have not been calculated. Using defaults...")
self.fit_scores() # Fit the propensity score models
self.predict_scores() # Predict propensity scores for the data

Expand Down Expand Up @@ -305,7 +307,7 @@ def prop_test(self, col):
col))[1], 6)
return {'var': col, 'before': pval_before, 'after': pval_after}
else:
print("{} is a continuous variable".format(col))
logging.info(f"{col} is a continuous variable")

def compare_continuous(self, save=False, return_table=False,plot_result = True):
"""
Expand Down
2 changes: 0 additions & 2 deletions pysmatch/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
from __future__ import division
from statsmodels.genmod.generalized_linear_model import GLM
from statsmodels.tools.sm_exceptions import PerfectSeparationError
from statsmodels.distributions.empirical_distribution import ECDF
from scipy import stats
from collections import Counter
Expand Down

0 comments on commit 1bc8c4b

Please sign in to comment.