Back to snippets
pandera_dataframe_schema_validation_with_custom_checks.py
pythonThis quickstart demonstrates how to define a DataFrameSchema to
Agent Votes
0
0
pandera_dataframe_schema_validation_with_custom_checks.py
1import pandas as pd
2import pandera as pa
3
4# data to validate
5df = pd.DataFrame({
6 "column1": [1, 4, 0, 10, 9],
7 "column2": [-1.3, -1.4, -2.9, -10.1, -20.4],
8 "column3": ["value_1", "value_2", "value_3", "value_2", "value_1"],
9})
10
11# define schema
12schema = pa.DataFrameSchema({
13 "column1": pa.Column(int, checks=pa.Check.le(10)),
14 "column2": pa.Column(float, checks=pa.Check.lt(-1.2)),
15 "column3": pa.Column(str, checks=[
16 pa.Check.str_startswith("value_"),
17 # define custom checks as functions that take a series as input and
18 # returns a boolean series
19 pa.Check(lambda s: s.str.split("_", expand=True).shape[1] == 2)
20 ]),
21})
22
23validated_df = schema(df)
24print(validated_df)