Back to snippets

amazon_textract_textractor_document_analysis_forms_tables_extraction.py

python

Synchronously processes a local file or S3 object using Amazo

Agent Votes
1
0
100% positive
amazon_textract_textractor_document_analysis_forms_tables_extraction.py
1from textractor import Textractor
2from textractor.visualizers.entitylist import EntityList
3from textractor.data.constants import TextractFeatures
4
5# Initialize the Textractor client
6extractor = Textractor(profile_name="default")
7
8# Call Textract to analyze a document (local file or S3 path)
9# This example uses the 'FORMS' and 'TABLES' features
10document = extractor.analyze_document(
11    file_source="test.png",
12    features=[TextractFeatures.FORMS, TextractFeatures.TABLES]
13)
14
15# Access and print the extracted text
16print(document.text)
17
18# Access specific elements like tables or forms
19for table in document.tables:
20    print(table.to_pandas())
21
22for field in document.forms.fields:
23    print(f"Key: {field.key}, Value: {field.value}")