from sklearn.linear_model import SGDClassifier

# CSV Reader object to read data in chunks 
data = pd.read_csv("large_dataset.csv",
                   chunksize=400000)

# Define model
model_chunk = SGDClassifier()

# Incremental learning
for batch in data:
    model_chunk.partial_fit(batch["X"],
                            batch["y"],
                            classes=[0, 1])

Train on

chunk using

partial_fit

Training time:

33 seconds