Sklearn & SHAP
Pre-requirements
pip install shap, xgboost, scikit-learn
Cleaning
import pandas as pd
# label encoder
df['ocean_proximity_encoded'], labels = pd.factorize(df['ocean_proximity'])
df = df.drop("ocean_proximity", axis=1)
# onehot encoder
one_hot_encoded = pd.get_dummies(df['ocean_proximity'])
df = pd.concat([df, one_hot_encoded], axis=1).drop("ocean_proximity", axis=1)
# train_test_split
x_train, x_test, y_train, y_test = train_test_split(df, test_size=0.2, random_state=0)
Modeling
Classification
import numpy as np
from xgboost import XGBClassifier
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
iris_data = load_iris()
x = iris_data.data
y = iris_data.target
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)
model = XGBClassifier(n_estimators=2, max_depth=2, learning_rate=1, objective='binary:logistic')
model.fit(X_train, y_train)
y_pred = model.predict(x_test)
print('The accuracy of the test set is:', model.score(x_test,y_test))
matrix_test = confusion_matrix(y_test, y_pred)
print('The confusion matrix for the training set is:\n',matrix_test)
report_test = classification_report(y_test, y_pred)
print(f'The classification report for the training set is: \n {report_test}')
Regression
import pandas as pd
from xgboost import XGBRegressor
url = "https://raw.githubusercontent.com/sonarsushant/California-House-Price-Prediction/master/housing.csv"
df = pd.read_csv(url)
df = df.drop("ocean_proximity", axis=1)
x = df.drop("median_house_value", axis=1)
y = df[['median_house_value']]
model = XGBRegressor()
model.fit(x, y)
Evaluation
SHAP
import shap
shap.initjs()
explainer = shap.Explainer(model)
shap_values = explainer(x)
shap.plots.waterfall(shap_values[0])
shap.plots.force(shap_values[0], matplotlib=True)
shap.plots.force(shap_values[:500])
shap.plots.scatter(shap_values[:, "Latitude"], color=shap_values)
shap.plots.beeswarm(shap_values)
shap.plots.bar(shap_values)
References
Last updated