import numpy as np
from sklearn.linear_model import SGDClassifier
from sklearn.datasets import make_classification
from sklearn.metrics import log_loss
import matplotlib.pyplot as plt

np.random.seed(42)
# Create synthetic classification dataset
X, y = make_classification(
    n_samples=500, n_features=50, n_informative=15, random_state=42
)

# Parameters
epochs = 10
learning_rate = 0.01


# SGD with Random Reshuffling
def sgd_random_reshuffling(X, y, epochs, lr):
    clf = SGDClassifier(
        loss="log_loss", learning_rate="constant", eta0=lr, random_state=42
    )
    losses = []
    for epoch in range(epochs):
        indices = np.random.permutation(len(y))
        X_shuffled, y_shuffled = X[indices], y[indices]
        clf.partial_fit(X_shuffled, y_shuffled, classes=np.unique(y))
        losses.append(log_loss(y, clf.predict_proba(X)))
    return losses


# SGD with Cyclic Permutation
def sgd_permutation_order(X, y, epochs, lr):
    clf = SGDClassifier(
        loss="log_loss", learning_rate="constant", eta0=lr, random_state=42
    )
    losses = []
    indices = np.arange(len(y))
    for epoch in range(epochs):
        X_ordered, y_ordered = X[indices], y[indices]
        clf.partial_fit(X_ordered, y_ordered, classes=np.unique(y))
        losses.append(log_loss(y, clf.predict_proba(X)))
        indices = np.roll(indices, shift=-1)
    return losses


# SGD with Ascending Order
def sgd_ascending_order(X, y, epochs, lr):
    clf = SGDClassifier(
        loss="log_loss", learning_rate="constant", eta0=lr, random_state=42
    )
    losses = []
    indices = np.argsort(y)
    for epoch in range(epochs):
        X_ordered, y_ordered = X[indices], y[indices]
        clf.partial_fit(X_ordered, y_ordered, classes=np.unique(y))
        losses.append(log_loss(y, clf.predict_proba(X)))
    return losses


# SGD with Descending Order
def sgd_descending_order(X, y, epochs, lr):
    clf = SGDClassifier(
        loss="log_loss", learning_rate="constant", eta0=lr, random_state=42
    )
    losses = []
    indices = np.argsort(-y)
    for epoch in range(epochs):
        X_ordered, y_ordered = X[indices], y[indices]
        clf.partial_fit(X_ordered, y_ordered, classes=np.unique(y))
        losses.append(log_loss(y, clf.predict_proba(X)))
    return losses


# SGD with Stratified Sampling
def sgd_stratified_order(X, y, epochs, lr):
    clf = SGDClassifier(
        loss="log_loss", learning_rate="constant", eta0=lr, random_state=42
    )
    losses = []
    class_indices = [np.where(y == label)[0] for label in np.unique(y)]
    for epoch in range(epochs):
        stratified_indices = np.concatenate(
            [np.random.permutation(indices) for indices in class_indices]
        )
        X_ordered, y_ordered = X[stratified_indices], y[stratified_indices]
        clf.partial_fit(X_ordered, y_ordered, classes=np.unique(y))
        losses.append(log_loss(y, clf.predict_proba(X)))
    return losses


# Run all methods and plot convergence
loss_random = sgd_random_reshuffling(X, y, epochs, learning_rate)
loss_permutation = sgd_permutation_order(X, y, epochs, learning_rate)
loss_ascending = sgd_ascending_order(X, y, epochs, learning_rate)
loss_descending = sgd_descending_order(X, y, epochs, learning_rate)
loss_stratified = sgd_stratified_order(X, y, epochs, learning_rate)

plt.plot(range(epochs), loss_random, label="Random Reshuffling", marker="o")
plt.plot(range(epochs), loss_permutation, label="Cyclic Permutation", marker="o")
plt.plot(range(epochs), loss_ascending, label="Ascending Order", marker="o")
plt.plot(range(epochs), loss_descending, label="Descending Order", marker="o")
plt.plot(range(epochs), loss_stratified, label="Stratified Sampling", marker="o")
plt.xlabel("Epoch")
plt.ylabel("Log Loss")
plt.legend()
plt.title("SGD Convergence: Various Example Ordering Schemes")
plt.show()