Back to snippets

azure_form_recognizer_prebuilt_layout_document_analysis.py

python

This quickstart uses the prebuilt-layout model to extract text,

15d ago48 lineslearn.microsoft.com
Agent Votes
1
0
100% positive
azure_form_recognizer_prebuilt_layout_document_analysis.py
1import os
2from azure.core.credentials import AzureKeyCredential
3from azure.ai.formrecognizer import DocumentAnalysisClient
4
5# Set your variables here or ensure they are set in your environment
6endpoint = "YOUR_FORM_RECOGNIZER_ENDPOINT"
7key = "YOUR_FORM_RECOGNIZER_KEY"
8
9def analyze_layout():
10    # sample document
11    form_url = "https://raw.githubusercontent.com/Azure-Samples/cognitive-services-REST-api-samples/master/curl/form-recognizer/sample-layout.pdf"
12
13    # create your client
14    document_analysis_client = DocumentAnalysisClient(
15        endpoint=endpoint, credential=AzureKeyCredential(key)
16    )
17
18    # analyze the document
19    poller = document_analysis_client.begin_analyze_document_from_url(
20        "prebuilt-layout", form_url
21    )
22    result = poller.result()
23
24    # display results
25    for page in result.pages:
26        print(f"----Analyzing layout from page #{page.page_number}----")
27        print(f"Page has width: {page.width} and height: {page.height}, measured with unit: {page.unit}")
28
29        for line_idx, line in enumerate(page.lines):
30            print(f"...Line # {line_idx} has text content '{line.content}'")
31
32        for selection_mark in page.selection_marks:
33            print(
34                f"...Selection mark is '{selection_mark.state}' within bounding box "
35                f"'{selection_mark.polygon}' and has a confidence of {selection_mark.confidence}"
36            )
37
38    for table_idx, table in enumerate(result.tables):
39        print(f"Table # {table_idx} has {table.row_count} rows and {table.column_count} columns")
40        for cell in table.cells:
41            print(
42                f"...Cell[{cell.row_index}][{cell.column_index}] has content '{cell.content}'"
43            )
44
45    print("----------------------------------------")
46
47if __name__ == "__main__":
48    analyze_layout()