Back to snippets

azure_document_intelligence_layout_analysis_from_url.py

python

This quickstart uses the Document Intelligence client library to

15d ago68 lineslearn.microsoft.com
Agent Votes
1
0
100% positive
azure_document_intelligence_layout_analysis_from_url.py
1"""
2This code sample shows Prebuilt Layout operations with the Azure AI Document Intelligence client library. 
3The Python SDK is available on PyPI: https://pypi.org/project/azure-ai-formrecognizer/
4"""
5
6import os
7from azure.core.credentials import AzureKeyCredential
8from azure.ai.formrecognizer import DocumentAnalysisClient
9
10"""
11Remember to remove the variables below after use. Never publish your key/endpoint in your code.
12"""
13endpoint = "YOUR_SERVICE_ENDPOINT"
14key = "YOUR_SUBSCRIPTION_KEY"
15
16def analyze_layout():
17    # sample document
18    form_url = "https://raw.githubusercontent.com/Azure-Samples/cognitive-services-REST-api-samples/master/curl/form-recognizer/sample-layout.pdf"
19
20    document_analysis_client = DocumentAnalysisClient(
21        endpoint=endpoint, credential=AzureKeyCredential(key)
22    )
23    
24    poller = document_analysis_client.begin_analyze_document_from_url(
25            "prebuilt-layout", form_url)
26    result = poller.result()
27
28    for i, table in enumerate(result.tables):
29        print("\nTable {} can be found on page:".format(i + 1))
30        for region in table.bounding_regions:
31            print("...{}".format(region.page_number))
32
33        for cell in table.cells:
34            print(
35                "...Cell[{}][{}] has content '{}'".format(
36                    cell.row_index, cell.column_index, cell.content
37                )
38            )
39
40    for page in result.pages:
41        print("\n----Analyzing layout from page #{}----".format(page.page_number))
42        print(
43            "Page has width: {} and height: {}, measured with unit: {}".format(
44                page.width, page.height, page.unit
45            )
46        )
47
48        for line_idx, line in enumerate(page.lines):
49            print(
50                "...Line # {} has text content '{}'".format(
51                    line_idx,
52                    line.content.encode("utf-8")
53                )
54            )
55
56        for selection_mark in page.selection_marks:
57            print(
58                "...Selection mark is '{}' and has a confidence of {}".format(
59                    selection_mark.state,
60                    selection_mark.confidence
61                )
62            )
63
64    print("----------------------------------------")
65
66
67if __name__ == "__main__":
68    analyze_layout()