MLFlow - snappify

dataset.py
import pandas as pd
from sklearn.datasets import make_classification

# Data with 5 features and 1000 samples
X, y = make_classification(n_samples=1000,
                           n_features=5,
                           n_informative=5,
                           n_redundant=0,
                           n_repeated=0,
                           n_classes=2,
                           random_state=42)

# Display the shapes of X and y
print(f"Shape of X: {X.shape}") # Shape of X: (100, 5)
print(f"Shape of y: {y.shape}") # Shape of y: (100,)

data = pd.DataFrame(X, columns=["f1", "f2", "f3", "f4", "f5"])
data["label"] = y

data.to_csv("data.csv", index=False)
Dummy classification
dataset stored in CSV