import pandas as pd
from sklearn.datasets import make_classification
# Data with 5 features and 1000 samples
X, y = make_classification(n_samples=1000,
n_features=5,
n_informative=5,
n_redundant=0,
n_repeated=0,
n_classes=2,
random_state=42)
# Display the shapes of X and y
print(f"Shape of X: {X.shape}") # Shape of X: (100, 5)
print(f"Shape of y: {y.shape}") # Shape of y: (100,)
data = pd.DataFrame(X, columns=["f1", "f2", "f3", "f4", "f5"])
data["label"] = y
data.to_csv("data.csv", index=False)
Dummy classification
dataset stored in CSV