import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.linear_model import SGDRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_squared_error, r2_score


# Fixer le hasard pour obtenir toujours les mêmes résultats
np.random.seed(42)

# Nombre de données
n = 200

# Variable explicative x (matrice colonne)
X = np.random.uniform(0, 10, size=(n, 1))

# Paramètres réels (inconnus pour l'étudiant)
a_true = 3.0
b_true = 1.5

# Bruit aléatoire
noise = np.random.normal(0, 2, size=n)

# Variable cible y
y = a_true * X[:, 0] + b_true + noise

print("Shape de X :", X.shape)
print("Shape de y :", y.shape)

Shape de X : (200, 1)
Shape de y : (200,)


plt.scatter(X, y)
plt.xlabel("x")
plt.ylabel("y")
plt.title("Données fictives")
plt.show()


X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42
)

print("Train :", X_train.shape)
print("Test  :", X_test.shape)

Train : (160, 1)
Test  : (40, 1)


model = make_pipeline(
    StandardScaler(),  # Mise à l'échelle des données
    SGDRegressor(
        max_iter=1000,        # Nombre d'itérations
        eta0=0.01,            # Taux d'apprentissage
        learning_rate='constant',
        random_state=42
    )
)


model.fit(X_train, y_train)

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('sgdregressor',
                 SGDRegressor(learning_rate='constant', random_state=42))])


sgd = model.named_steps["sgdregressor"]

print("Coefficient (a) appris :", round(sgd.coef_[0], 3))
print("Intercept (b) appris  :", round(sgd.intercept_[0], 3))

Coefficient (a) appris : 8.841
Intercept (b) appris  : 16.104


y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("MSE :", round(mse, 2))
print("R²  :", round(r2, 3))

MSE : 4.3
R²  : 0.942


x_grid = np.linspace(X.min(), X.max(), 200).reshape(-1, 1)
y_grid = model.predict(x_grid)

plt.scatter(X_test, y_test, label="Données réelles")
plt.plot(x_grid, y_grid, color="red", label="Modèle (SGDRegressor)")
plt.xlabel("x")
plt.ylabel("y")
plt.legend()
plt.title("Régression linéaire avec descente de gradient (scikit-learn)")
plt.show()

	steps	[('standardscaler', ...), ('sgdregressor', ...)]
	transform_input	None
	memory	None
	verbose	False

	copy	True
	with_mean	True
	with_std	True

	loss	'squared_error'
	penalty	'l2'
	alpha	0.0001
	l1_ratio	0.15
	fit_intercept	True
	max_iter	1000
	tol	0.001
	shuffle	True
	verbose	0
	epsilon	0.1
	random_state	42
	learning_rate	'constant'
	eta0	0.01
	power_t	0.25
	early_stopping	False
	validation_fraction	0.1
	n_iter_no_change	5
	warm_start	False
	average	False

TP — Régression linéaire par descente de gradient avec scikit-learn¶

Objectifs pédagogiques¶

1) Importation des bibliothèques¶

2) Génération de données fictives¶

3) Visualisation des données¶

4) Séparation Train / Test¶

5) Création du modèle avec scikit-learn¶

6) Entraînement du modèle¶

7) Paramètres appris¶

8) Prédiction et évaluation¶

9) Visualisation de la droite de régression¶

Conclusion pédagogique¶