Skip to content

Commit

Permalink
Update 0.1.6-alpha-1
Browse files Browse the repository at this point in the history
- ANAI Open Source Alpha Build 6

- Updated Documentation
- df_loader can now take kwargs related to pandas
it shall be given while we are creating ANAI objects in form of df_kwargs argument
- Added opion to show graphs while explaining ANAI Models
- Fit method will run Automaticaly if ANAI is ran through anai.run() if Regerssion or Classification are used separately fit shall be called
- Explain Method now returns result in for dataframe
- Removed Unnecessary Import from Predictor

Signed-off-by: Arsh <lucifer78908@gmail.com>
  • Loading branch information
d4rk-lucif3r committed Aug 27, 2022
1 parent 125824d commit 5de8127
Show file tree
Hide file tree
Showing 12 changed files with 164 additions and 101 deletions.
19 changes: 15 additions & 4 deletions anai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def run(
df=None,
target: str = None,
filepath: str = None,
df_kwargs: dict = {},
config: bool = False,
except_columns: list = [],
predictor: list = [],
Expand Down Expand Up @@ -64,6 +65,10 @@ def run(
DataFrame to be used for modelling.
target : str
Target Column Name
filepath : str
Filepath of the dataframe to be loaded.
df_kwargs : dict
Keyword arguments for the dataframe loading function. Only used if filepath is not None.
except_columns : list, optional
List of Columns to be excluded from the dataset
predictor : list
Expand Down Expand Up @@ -137,6 +142,7 @@ def run(
ai = anai.run(
filepath='examples/Folds5x2_pp.xlsx',
df_kwargs={'sheet_name': 'Sheet1'},
target='PE',
predictor=['lin'],
)
Expand Down Expand Up @@ -166,7 +172,7 @@ def run(
raise FileNotFoundError("ANAI Config File Not Found")
if df is None:
if filepath is not None:
df = df_loader(filepath, suppress=True)
df = df_loader(filepath, suppress=True, **df_kwargs)
else:
raise ValueError("Please provide a dataframe or a filepath")
if __task(df, target) and not suppress_task_detection or task == "regression":
Expand All @@ -182,6 +188,7 @@ def run(
df=df,
target=target,
filepath=filepath,
df_kwargs=df_kwargs,
config=config,
except_columns=except_columns,
predictor=predictor,
Expand All @@ -206,6 +213,7 @@ def run(
metric=metric,
ensemble=ensemble,
)
regressor.fit()
return regressor
elif (
not __task(df, target)
Expand All @@ -224,6 +232,7 @@ def run(
df=df,
target=target,
filepath=filepath,
df_kwargs=df_kwargs,
config=config,
except_columns=except_columns,
predictor=predictor,
Expand All @@ -243,6 +252,7 @@ def run(
ensemble=ensemble,
exclude_models=exclude_models,
)
classifier.fit()
return classifier
except KeyboardInterrupt:
if os.path.exists(os.getcwd() + "/dask-worker-space"):
Expand Down Expand Up @@ -283,11 +293,12 @@ def __task(df, target):
return False


def load(df_filepath):
def load(df_filepath, **df_kwargs):
"""Loads a dataframe from a filepath.
Args:
df_filepath (str): Filepath of the dataframe to be loaded.
df_kwargs (dict): Keyword arguments to be passed to df_loader function.
Returns:
pd.DataFrame : Loaded dataframe.
Expand All @@ -296,12 +307,12 @@ def load(df_filepath):

suppress = False
if type(df_filepath) is str:
df = __df_loader_single(df_filepath, suppress=False)
df = __df_loader_single(df_filepath, suppress=False, **df_kwargs)
elif type(df_filepath) is list:
print(Fore.YELLOW + "Loading Data [*]\n")
df = pd.concat(
[
__df_loader_single(df_filepath[i], suppress=True)
__df_loader_single(df_filepath[i], suppress=True, **df_kwargs)
for i in range(len(df_filepath))
]
)
Expand Down
100 changes: 64 additions & 36 deletions anai/supervised/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def __init__(
df=None,
target: str = None,
filepath: str = None,
df_kwargs: dict = {},
config: bool = False,
except_columns: list = [],
predictor: list =["lr"],
Expand Down Expand Up @@ -68,11 +69,16 @@ def __init__(
Parameters:
features : array
features array
lables : array
labels array
except_columns (list): [List of Columns to be excluded from the dataset]
df : Pandas DataFrame
DataFrame to be used for modelling.
target : str
Target Column Name
filepath : str
Filepath of the dataframe to be loaded.
df_kwargs : dict
Keyword arguments for the dataframe loading function. Only used if filepath is not None.
except_columns : list, optional
List of Columns to be excluded from the dataset
predictor : list
Predicting model to be used
Default ['lr'] - Logistic Regression\n
Expand Down Expand Up @@ -116,7 +122,7 @@ def __init__(
params : dict
contains parameters for model
tune : boolean
when True Applies GridSearch CrossValidation
when True Applies Optuna to find best parameters for model
Default is False
test_size: float or int, default=.2
If float, should be between 0.0 and 1.0 and represent
Expand All @@ -125,21 +131,18 @@ def __init__(
If int, represents the absolute number of test samples.
cv_folds : int
No. of cross validation folds. Default = 10
pca : str
if 'y' will apply PCA on Train and Validation set. Default = 'n'
pca : bool
if True will apply PCA on Train and Validation set. Default = False
lda : str
if 'y' will apply LDA on Train and Validation set. Default = 'n'
if True will apply LDA on Train and Validation set. Default = False
pca_kernel : str
Kernel to be use in PCA. Default = 'linear'
n_components_lda : int
No. of components for LDA. Default = 1
n_components_pca : int
No. of components for PCA. Default = 2
loss : str
loss method for ann. Default = 'binary_crossentropy'
rate for dropout layer. Default = 0
smote : str,
Whether to apply SMOTE. Default = 'y'
smote : Bool,
Whether to apply SMOTE. Default = True
k_neighbors : int
No. of neighbors for SMOTE. Default = 1
verbose : boolean
Expand All @@ -160,12 +163,8 @@ def __init__(
minimize : Minimize
optuna_n_trials : int
No. of trials for optuna. Default = 100
optuna_metric: str
Metric to be used in optuna. Default = 'R^2'
lgbm_objective : str
Objective for lgbm classifier. Default = 'binary'
ensemble : boolean
Whether to use ensemble. Default = True
Whether to use ensemble methods. Default = True
Returns:
Expand All @@ -178,6 +177,7 @@ def __init__(
import anai
ai = anai.run(
filepath='examples/test_data.csv',
df_kwargs={'index_col':'id'},
target='PE',
predictor=['lr'],
)
Expand All @@ -196,7 +196,7 @@ def __init__(
raise FileNotFoundError("ANAI Config File Not Found")
if df is None:
if filepath is not None:
df = df_loader(filepath)
df = df_loader(filepath, **df)
else:
raise ValueError("Please provide a dataframe or a filepath")
if type(predictor) == list:
Expand Down Expand Up @@ -295,9 +295,8 @@ def __init__(
self.dimension_handler = DimensionHandler()
self.encoder = None
self.le_encoder = None
self.__fit()

def __fit(self):
def fit(self):
"""[Takes Features and Labels and Encodes Categorical Data then Applies SMOTE , Splits the features and labels in training and validation sets with test_size = .2
scales X_train, self.X_val using StandardScaler.
Fits every model on training set and predicts results,
Expand Down Expand Up @@ -760,9 +759,21 @@ def __load(self, path=None):
else:
raise ValueError("No path specified.Please provide actual path\n")

def explain(self, method):
def explain(self, method, show_graph=True):
"""
Returns the importance features of the dataset
Explains the model using the specified method.
args:
method (str): [Method to use for explaining the model]
Available methods:
- shap
- perm
show_graph (bool): [Whether to show the graph or not]
returns:
[DataFrame] : [Explained DataFrame]
"""
columns = self.features.columns
self.explainer.set_params(
Expand All @@ -775,6 +786,7 @@ def explain(self, method):
self.fit_params,
False,
columns,
show_graph,
)
if self.pred_mode == "all":
classifier = copy.deepcopy(self.best_classifier.model)
Expand Down Expand Up @@ -804,6 +816,7 @@ def __init__(
df=None,
target: str = None,
filepath: str = None,
df_kwargs: dict = {},
config: bool = False,
except_columns: list = [],
predictor: list = ["lin"],
Expand Down Expand Up @@ -833,6 +846,10 @@ def __init__(
Parameters:
df (dataframe): [Dataset containing features and target]
target (str): [Target Column Name]
filepath : str
Filepath of the dataframe to be loaded.
df_kwargs : dict
Keyword arguments for the dataframe loading function. Only used if filepath is not None.
except_columns (list): [List of Columns to be excluded from the dataset]
predictor : list
Predicting models to be used
Expand Down Expand Up @@ -931,6 +948,7 @@ def __init__(
ai = anai.run(
filepath='examples/Folds5x2_pp.xlsx',
df_kwargs={'sheet_name': 'Sheet1'},
target='PE',
predictor=['lin'],
)
Expand All @@ -949,7 +967,7 @@ def __init__(
raise FileNotFoundError("ANAI Config File Not Found")
if df is None:
if filepath is not None:
df = df_loader(filepath)
df = df_loader(filepath, **df_kwargs)
# elif config_filepath is not None:
# df, target = load_data_from_config(config_filepath)
else:
Expand Down Expand Up @@ -1048,9 +1066,8 @@ def __init__(
self.encoder = None
self.features = None
self.labels = None
self.__fit()

def __fit(self):
def fit(self):
"""[Takes Features and Labels and Encodes Categorical Data then Applies SMOTE , Splits the features and labels in training and validation sets with test_size = .2
scales X_train, X_val using StandardScaler.
Fits model on training set and predicts results, Finds R^2 Scoreand mean square error
Expand Down Expand Up @@ -1573,9 +1590,21 @@ def __load(self, path=None):
else:
raise ValueError("No path specified.Please provide actual path\n")

def explain(self, method):
def explain(self, method, show_graph=True):
"""
Returns the importance features of the dataset
Explains the model using the specified method.
args:
method (str): [Method to use for explaining the model]
Available methods:
- shap
- perm
show_graph (bool): [Whether to show the graph or not]
returns:
[DataFrame] : [Explained DataFrame]
"""
self.explainer.set_params(
self.features,
Expand All @@ -1585,6 +1614,7 @@ def explain(self, method):
self.y_val,
self.cv_folds,
self.fit_params,
self.show_graph,
)
if self.pred_mode == "all":
regressor = copy.deepcopy(self.best_regressor.model)
Expand All @@ -1595,14 +1625,12 @@ def explain(self, method):
)
print(Fore.YELLOW + "Explaining ANAI [*]\n")

if self.original_predictor == "all":
raise TypeError(
"[Error] This method is only applicable on single predictor"
)
elif method == "perm":
self.explainer.permutation(model=regressor)
if method == "perm":
res = self.explainer.permutation(model=regressor)
return res
elif method == "shap":
self.explainer.shap(model=regressor)
res = self.explainer.shap(model=regressor)
return res
else:
raise NotImplementedError(
"Technique not implemented. Please choose from perm, shap"
Expand Down
Loading

0 comments on commit 5de8127

Please sign in to comment.