import pandas as pd
import sklearn
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
df = pd.read_csv("churn.csv")
df.drop(columns=["id_cliente"], inplace=True)
X = df.drop(columns=["churn"])
y = df["churn"]
columnas_categoricas = X.select_dtypes(include=["object"]).columns
if sklearn.__version__ >= "1.2":
encoder = OneHotEncoder(drop='if_binary', sparse_output=False)
else:
encoder = OneHotEncoder(drop='if_binary', sparse=False)
X_cod = pd.DataFrame(
encoder.fit_transform(X[columnas_categoricas]),
columns=encoder.get_feature_names_out(columnas_categoricas)
)
X_numericas = X.drop(columns=columnas_categoricas).reset_index(drop=True)
X = pd.concat([X_numericas, X_cod], axis=1)
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)
print("X shape:", X.shape)
print("y shape:", y.shape)
print("Primeras filas de X:")
print(X.head())
print("Primeros valores de y:", y[:10])