pandera.py
import pandas as pd
import pandera.pandas as pa
from pandera.typing import Series

class TrainingDataSchema(pa.DataFrameModel):
    feature1: Series[float] = pa.Field(gt=0, nullable=False)
    feature2: Series[int] = pa.Field(
        in_range={"min_value": 0, "max_value": 100}
    )
    label: Series[int] = pa.Field(isin=[0, 1])

# Validate dataframe
df = pd.read_csv("some_data.csv")
try:
    TrainingDataSchema.validate(df, lazy=True)
    print("validation passed")
except pa.errors.SchemaErrors as e:
    print(e.failure_cases)       # full table
    print(e.failure_cases.shape) # how many

validation

schema definition