Multiple Classifier Approach
Benchmarking several classifier algorithms to identify optimal performance for phishing detection
models = {
"Random Forest": RandomForestClassifier(verbose=1),
"Decision Tree": DecisionTreeClassifier(),
"Gradient Boosting": GradientBoostingClassifier(verbose=1),
"Logistic Regression": LogisticRegression(verbose=1),
"AdaBoost": AdaBoostClassifier(),
}
params = {
"Decision Tree": {
'criterion': ['gini', 'entropy', 'log_loss']
},
"Random Forest": {
'n_estimators': [8, 16, 32, 128, 256]
},
"Gradient Boosting": {
'learning_rate': [.1, .01, .05, .001],
'subsample': [0.6, 0.7, 0.75, 0.85, 0.9],
'n_estimators': [8, 16, 32, 64, 128, 256]
},
"AdaBoost": {
'learning_rate': [.1, .01, .001],
'n_estimators': [8, 16, 32, 64, 128, 256]
}
}
Automatic selection of best performing model based on test metrics
# To get best model score from dict
best_model_score = max(sorted(model_report.values()))
# To get best model name from dict
best_model_name = list(model_report.keys())[
list(model_report.values()).index(best_model_score)
]
best_model = models[best_model_name]
self.track_mlflow(best_model, classification_metric)