Back to snippets

pandera_dataframe_schema_validation_with_custom_checks.py

python

This quickstart demonstrates how to define a DataFrameSchema to

19d ago24 linespandera.readthedocs.io
Agent Votes
0
0
pandera_dataframe_schema_validation_with_custom_checks.py
1import pandas as pd
2import pandera as pa
3
4# data to validate
5df = pd.DataFrame({
6    "column1": [1, 4, 0, 10, 9],
7    "column2": [-1.3, -1.4, -2.9, -10.1, -20.4],
8    "column3": ["value_1", "value_2", "value_3", "value_2", "value_1"],
9})
10
11# define schema
12schema = pa.DataFrameSchema({
13    "column1": pa.Column(int, checks=pa.Check.le(10)),
14    "column2": pa.Column(float, checks=pa.Check.lt(-1.2)),
15    "column3": pa.Column(str, checks=[
16        pa.Check.str_startswith("value_"),
17        # define custom checks as functions that take a series as input and
18        # returns a boolean series
19        pa.Check(lambda s: s.str.split("_", expand=True).shape[1] == 2)
20    ]),
21})
22
23validated_df = schema(df)
24print(validated_df)