from sklearn.linear_model import SGDClassifier
# CSV Reader object to read data in chunks 
data = pd.read_csv("large_dataset.csv",
                   chunksize=400000)
# Define model
model_chunk = SGDClassifier()
# Incremental learning
for batch in data:
    model_chunk.partial_fit(batch["X"],
                            batch["y"],
                            classes=[0, 1])Train on
chunk using
partial_fit
Training time:
33 seconds