MLFlow - snappify.com

train_model.py
if __name__ == "__main__":

    # Read the data csv file
    data = pd.read_csv("data.csv")

    # Split the data into training and test sets
    train, test = train_test_split(data, test_size=test_size)

    # Create (X, y) data
    x_train = train.drop(["label"], axis=1)
    y_train = train["label"]
    
    x_test = test.drop(["label"], axis=1)
    y_test = test["label"]

    # Train model
    rf_model = RandomForestClassifier(n_estimators=n_estimators,
                                      max_depth=max_depth)
    rf_model.fit(x_train, y_train)
    
    # Generate predictions
    predictions = rf_model.predict(x_test)

    # Determine performance metrics
    f1, accuracy, precision = performance(y_test, predictions)

    # Print model details
    print(f"Random Forest: {n_estimators=}, {max_depth=}")
    print(f"F1 score: {f1}")
    print(f"Accuracy: {accuracy}")
    print(f"Precision: {precision}")
Main block