Sklearn & SHAP

Pre-requirements

pip install shap, xgboost, scikit-learn

Cleaning

import pandas as pd

# label encoder
df['ocean_proximity_encoded'], labels = pd.factorize(df['ocean_proximity'])
df = df.drop("ocean_proximity", axis=1)

# onehot encoder
one_hot_encoded = pd.get_dummies(df['ocean_proximity'])
df = pd.concat([df, one_hot_encoded], axis=1).drop("ocean_proximity", axis=1)

# train_test_split
x_train, x_test, y_train, y_test = train_test_split(df, test_size=0.2, random_state=0)

Modeling

Classification

import numpy as np
from xgboost import XGBClassifier
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

iris_data = load_iris()

x = iris_data.data
y = iris_data.target
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)

model = XGBClassifier(n_estimators=2, max_depth=2, learning_rate=1, objective='binary:logistic')
model.fit(X_train, y_train)

y_pred = model.predict(x_test)

print('The accuracy of the test set is:', model.score(x_test,y_test))

matrix_test = confusion_matrix(y_test, y_pred)
print('The confusion matrix for the training set is:\n',matrix_test)

report_test = classification_report(y_test, y_pred)
print(f'The classification report for the training set is: \n {report_test}')

Regression

import pandas as pd
from xgboost import XGBRegressor


url = "https://raw.githubusercontent.com/sonarsushant/California-House-Price-Prediction/master/housing.csv"
df = pd.read_csv(url)
df = df.drop("ocean_proximity", axis=1)

x = df.drop("median_house_value", axis=1)
y = df[['median_house_value']]
model = XGBRegressor()
model.fit(x, y)

Evaluation

SHAP

import shap
shap.initjs()

explainer = shap.Explainer(model)
shap_values = explainer(x)

shap.plots.waterfall(shap_values[0])
shap.plots.force(shap_values[0], matplotlib=True)
shap.plots.force(shap_values[:500])
shap.plots.scatter(shap_values[:, "Latitude"], color=shap_values)
shap.plots.beeswarm(shap_values)
shap.plots.bar(shap_values)

References

Last updated