from sklearn.linear_model import SGDClassifier
# CSV Reader object to read data in chunks
data = pd.read_csv("large_dataset.csv",
chunksize=400000)
# Define model
model_chunk = SGDClassifier()
# Incremental learning
for batch in data:
model_chunk.partial_fit(batch["X"],
batch["y"],
classes=[0, 1])
Train on
chunk using
partial_fit
Training time:
33 seconds