import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.datasets import fetch_openml
start_t = time.time()
X, y = fetch_openml("titanic", version=1, as_frame=True, return_X_y=True, parser="pandas")
# parser pandas is the addition in the version 1.2.0
X = X.select_dtypes(["number", "category"]).drop(columns=["body"])
print ('check types: ', type(X), '\n', X.head(3))
print ('check shapes: ', X.shape)
### drop nans
print ('check for nans in cols: ', X.isna().sum())
X_nonan = X.dropna(how='any', inplace=False)
print ('check new shapes: ', X_nonan.shape)
nonan_indices = X_nonan.index.to_list()
y_nonan = y[y.index.isin(nonan_indices)]
print ('check shape y: ', y_nonan.shape)
# print ('check for indices: ', X_nonan.index.to_list())
from sklearn.preprocessing import OrdinalEncoder
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import GradientBoostingRegressor #HistGradientBoostingRegressor
from sklearn.compose import ColumnTransformer
#### build a pipeline
categorical_features = ["pclass", "sex", "embarked"]
model = make_pipeline(ColumnTransformer(transformers=[("cat", OrdinalEncoder(), categorical_features)],
remainder="passthrough",),
GradientBoostingRegressor(random_state=0),).fit(X_nonan, y_nonan)
# gradientboosting regressor doesn't work with nan entries
from sklearn.inspection import PartialDependenceDisplay
fig, ax = plt.subplots(figsize=(14, 4), constrained_layout=True)
disp = PartialDependenceDisplay.from_estimator(model,
X_nonan, features=["age", "sex", ("pclass", "sex")],
categorical_features=categorical_features, ax=ax,)
fig.savefig(path_to_file + './part_disp.png', dpi=200)
################
# with v 0.24
################
# GBR_disp = plot_partial_dependence(model, X_NotNan,
# ['age', 'sex', ('age', 'sex')], ax=ax)
# >>> ValueError: could not convert string to float: 'female'
Спасибо за обратную связь!
Здравствуйте. Порекомендуйте пожалуйста актуальные MustRead книги на русском языке по ML и BigData
Добрый день! https://t.me/datascienceiot/2280