from sklearn.datasets import make_classification
# Define the dataset parameters
n_samples = 20_000_000 # 10 million rows
n_features = 5 # 5 columns
n_classes = 2 # Two classes
# Generate the dataset
X, y = make_classification(
n_samples=n_samples,
n_features=n_features,
n_informative=n_features,
n_classes=n_classes,
n_redundant=0,
n_repeated=0,
random_state=42
)
Classification
dataset