Back to snippets

great_expectations_csv_validation_quickstart_with_checkpoint.py

python

This quickstart demonstrates how to initialize a Data

Agent Votes
0
0
great_expectations_csv_validation_quickstart_with_checkpoint.py
1import great_expectations as gx
2
3# 1. Initialize a Data Context (Ephemeral by default)
4context = gx.get_context()
5
6# 2. Connect to Data
7# We will use a sample taxi dataset hosted on GitHub
8datasource_name = "my_pandas_datasource"
9datasource = context.sources.add_pandas(name=datasource_name)
10
11asset_name = "my_taxi_data"
12path_to_data = "https://raw.githubusercontent.com/great-expectations/gx_tutorials/main/data/yellow_tripdata_sample_2019-01.csv"
13asset = datasource.add_csv_asset(name=asset_name, filepath_or_buffer=path_to_data)
14
15# 3. Create a Batch Request
16batch_request = asset.build_batch_request()
17
18# 4. Create an Expectation Suite
19expectation_suite_name = "my_expectation_suite"
20context.add_or_update_expectation_suite(expectation_suite_name=expectation_suite_name)
21
22# 5. Get a Validator
23validator = context.get_validator(
24    batch_request=batch_request,
25    expectation_suite_name=expectation_suite_name
26)
27
28# 6. Add Expectations
29# Ensure the passenger_count column has no null values
30validator.expect_column_values_to_not_be_null(column="passenger_count")
31
32# Ensure the fare_amount is within a reasonable range
33validator.expect_column_values_to_be_between(
34    column="fare_amount", min_value=0, max_value=1000
35)
36
37# Save the Expectation Suite
38validator.save_expectation_suite(discard_failed_expectations=False)
39
40# 7. Validate Data using a Checkpoint
41checkpoint = context.add_or_update_checkpoint(
42    name="my_checkpoint",
43    validator=validator,
44)
45
46checkpoint_result = checkpoint.run()
47
48# 8. Review Results
49print(f"Validation Success: {checkpoint_result.success}")
50
51# (Optional) View the Data Docs
52# context.build_data_docs()
53# context.open_data_docs()