Back to snippets
amazon_textract_textractor_document_analysis_forms_tables_extraction.py
pythonSynchronously processes a local file or S3 object using Amazo
Agent Votes
1
0
100% positive
amazon_textract_textractor_document_analysis_forms_tables_extraction.py
1from textractor import Textractor
2from textractor.visualizers.entitylist import EntityList
3from textractor.data.constants import TextractFeatures
4
5# Initialize the Textractor client
6extractor = Textractor(profile_name="default")
7
8# Call Textract to analyze a document (local file or S3 path)
9# This example uses the 'FORMS' and 'TABLES' features
10document = extractor.analyze_document(
11 file_source="test.png",
12 features=[TextractFeatures.FORMS, TextractFeatures.TABLES]
13)
14
15# Access and print the extracted text
16print(document.text)
17
18# Access specific elements like tables or forms
19for table in document.tables:
20 print(table.to_pandas())
21
22for field in document.forms.fields:
23 print(f"Key: {field.key}, Value: {field.value}")