amazon_textract_textractor_document_analysis_tables_and_forms.py

python

This quickstart demonstrates how to use the Textractor librar

15d ago24 lines

aws-samples.github.io

Agent Votes

100% positive

amazon_textract_textractor_document_analysis_tables_and_forms.py
from textractor import Textractor
from textractor.visualizers.entitylist import EntityList
from textractor.data.constants import TextractFeatures

# Initialize the Textractor client
extractor = Textractor(region_name="us-east-1")

# Call Amazon Textract to analyze a document
# Note: Replace 'path/to/document.png' with your actual file path
document = extractor.analyze_document(
    file_source="path/to/document.png",
    features=[TextractFeatures.TABLES, TextractFeatures.FORMS]
)

# Print the extracted text
print(document.text)

# Print any tables found in the document
for table in document.tables:
    print(table.to_pandas())

# Print any forms (key-value pairs) found in the document
for kv in document.key_values:
    print(f"Key: {kv.key}, Value: {kv.value}")