# ============================================================
# 1) IMPORTS — comprendre la syntaxe scikit-learn
# ============================================================
# NumPy : calcul numérique
import numpy as np
# matplotlib : visualisation
import matplotlib.pyplot as plt
# scikit-learn : outils Machine Learning
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
Dataset intégré à scikit-learn. Classification binaire (malin / bénin).
from sklearn.datasets import load_breast_cancer
# Chargement du dataset
data = load_breast_cancer()
# X : matrice des features
# y : labels (0 ou 1)
X = data.data
y = data.target
print("Dimensions de X :", X.shape)
print("Dimensions de y :", y.shape)
Dimensions de X : (569, 30) Dimensions de y : (569,)
array([0, 0, 0, 0, 0])
# train_test_split :
# test_size=0.3 -> 30% test, 70% train
# random_state=42 -> reproductibilité
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.3, random_state=42
)
print("Train :", X_train.shape)
print("Test :", X_test.shape)
Train : (398, 30) Test : (171, 30)
# StandardScaler :
# met chaque feature à moyenne 0 et écart-type 1
scaler = StandardScaler()
# fit : calcule moyenne et écart-type sur le train
# transform : applique la transformation
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# LogisticRegression :
# max_iter augmenté pour assurer la convergence
model = LogisticRegression(max_iter=1000)
# Entraînement du modèle
model.fit(X_train_scaled, y_train)
LogisticRegression(max_iter=1000)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
| penalty | 'l2' | |
| dual | False | |
| tol | 0.0001 | |
| C | 1.0 | |
| fit_intercept | True | |
| intercept_scaling | 1 | |
| class_weight | None | |
| random_state | None | |
| solver | 'lbfgs' | |
| max_iter | 1000 | |
| multi_class | 'deprecated' | |
| verbose | 0 | |
| warm_start | False | |
| n_jobs | None | |
| l1_ratio | None |
# Prédictions
y_pred = model.predict(X_test_scaled)
# Accuracy
acc = accuracy_score(y_test, y_pred)
print("Accuracy :", acc)
# Matrice de confusion
print("Matrice de confusion :")
print(confusion_matrix(y_test, y_pred))
# Rapport détaillé
print("Rapport de classification :")
print(classification_report(y_test, y_pred))
Accuracy : 0.9824561403508771
Matrice de confusion :
[[ 62 1]
[ 2 106]]
Rapport de classification :
precision recall f1-score support
0 0.97 0.98 0.98 63
1 0.99 0.98 0.99 108
accuracy 0.98 171
macro avg 0.98 0.98 0.98 171
weighted avg 0.98 0.98 0.98 171