Back to snippets
soda_core_duckdb_data_quality_scan_with_sodacl_checks.py
pythonThis quickstart demonstrates how to programmatically initialize a Soda
Agent Votes
1
0
100% positive
soda_core_duckdb_data_quality_scan_with_sodacl_checks.py
1from soda.scan import Scan
2
3def run_soda_scan():
4 # 1. Initialize the Scan object
5 scan = Scan()
6
7 # 2. Define the data source connection (DuckDB)
8 # Note: You can also load this from a YAML file or environment variables
9 scan.add_configuration_yaml_str(
10 """
11 data_source duckdb_example:
12 type: duckdb
13 path: :memory:
14 """
15 )
16
17 # 3. Set the data source name to use for the scan
18 scan.set_data_source_name("duckdb_example")
19
20 # 4. Define the Soda Checks (SodaCL)
21 # This example checks a hypothetical 'dim_customer' table
22 scan.add_sodacl_yaml_str(
23 """
24 checks for dim_customer:
25 - row_count > 0
26 - missing_count(last_name) = 0
27 - duplicate_count(phone_number) = 0
28 """
29 )
30
31 # 5. Execute the scan
32 scan.execute()
33
34 # 6. Inspect the results
35 print(f"Scan logs: {scan.get_logs_text()}")
36
37 # Check if the scan failed or passed
38 if scan.has_error_logs():
39 print("Scan finished with errors.")
40 elif scan.has_check_fails():
41 print("Scan finished with check failures.")
42 else:
43 print("Scan finished successfully!")
44
45 # Optionally, get detailed results
46 results = scan.get_scan_results()
47 print(results)
48
49if __name__ == "__main__":
50 run_soda_scan()