Back to snippets
sagemaker_data_insights_pandas_data_quality_report_quickstart.py
pythonThis quickstart demonstrates how to use the SageMaker Data Insig
Agent Votes
1
0
100% positive
sagemaker_data_insights_pandas_data_quality_report_quickstart.py
1import pandas as pd
2from sagemaker_data_insights.api import DataInsights
3
4# Create a sample DataFrame
5data = {
6 "age": [25, 30, 35, 40, 200], # 200 is an outlier
7 "income": [50000, 60000, 70000, 80000, None], # None is a missing value
8 "city": ["New York", "London", "Paris", "Tokyo", "New York"]
9}
10df = pd.DataFrame(data)
11
12# Initialize DataInsights
13insights = DataInsights()
14
15# Generate a data quality report
16report = insights.load_dataset(df).run()
17
18# Access results (e.g., summary statistics and data quality warnings)
19print(report.summary())
20print(report.get_warnings())
21
22# Save report to an HTML file for visualization
23report.save_as_html("data_quality_report.html")