Skip to content

Commit

Permalink
add samples for converting to and from dictionaries
Browse files Browse the repository at this point in the history
  • Loading branch information
catalinaperalta committed Nov 15, 2021
1 parent fe9b0f8 commit af8bac3
Show file tree
Hide file tree
Showing 2 changed files with 247 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
# coding: utf-8

# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for
# license information.
# --------------------------------------------------------------------------

"""
FILE: sample_convert_to_and_from_dict_async.py
DESCRIPTION:
This sample demonstrates how to convert models returned from an analyze operation
to and from a dictionary. The dictionary in this sample is then converted to a
JSON file, then the same dictionary is converted back to its original model.
USAGE:
python sample_convert_to_and_from_dict_async.py
Set the environment variables with your own values before running the sample:
1) AZURE_FORM_RECOGNIZER_ENDPOINT - the endpoint to your Cognitive Services resource.
2) AZURE_FORM_RECOGNIZER_KEY - your Form Recognizer API key
"""

import os
import json
import asyncio

async def convert_to_and_from_dict_async():
path_to_sample_documents = os.path.abspath(
os.path.join(
os.path.abspath(__file__),
"..",
"..",
"..",
"./sample_forms/forms/Form_1.jpg",
)
)

from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer.aio import DocumentAnalysisClient
from azure.ai.formrecognizer import AnalyzeResult

endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

document_analysis_client = DocumentAnalysisClient(
endpoint=endpoint, credential=AzureKeyCredential(key)
)
async with document_analysis_client:
with open(path_to_sample_documents, "rb") as f:
poller = await document_analysis_client.begin_analyze_document(
"prebuilt-document", document=f
)
result = await poller.result()

# connvert the received model to a dictionary
d = result.to_dict()

# save the dictionary as a JSON content in a JSON file
with open('data.json', 'w') as f:
json.dump(d, f)

# convert the dictionary back to the original model
model = AnalyzeResult.from_dict(d)

# use the model as normal
for style in model.styles:
if style.is_handwritten:
print("Document contains handwritten content: ")
print(",".join([model.content[span.offset:span.offset + span.length] for span in style.spans]))

print("----Key-value pairs found in document----")
for kv_pair in model.key_value_pairs:
if kv_pair.key:
print(
"Key '{}' found within '{}' bounding regions".format(
kv_pair.key.content,
kv_pair.key.bounding_regions,
)
)
if kv_pair.value:
print(
"Value '{}' found within '{}' bounding regions\n".format(
kv_pair.value.content,
kv_pair.value.bounding_regions,
)
)

print("----Entities found in document----")
for entity in model.entities:
print("Entity of category '{}' with sub-category '{}'".format(entity.category, entity.sub_category))
print("...has content '{}'".format(entity.content))
print("...within '{}' bounding regions".format(entity.bounding_regions))
print("...with confidence {}\n".format(entity.confidence))

for table_idx, table in enumerate(model.tables):
print(
"Table # {} has {} rows and {} columns".format(
table_idx, table.row_count, table.column_count
)
)
for region in table.bounding_regions:
print(
"Table # {} location on page: {} is on {}".format(
table_idx,
region.page_number,
region.bounding_box,
)
)
for cell in table.cells:
print(
"...Cell[{}][{}] has content '{}'".format(
cell.row_index,
cell.column_index,
cell.content,
)
)
print("----------------------------------------")


async def main():
await convert_to_and_from_dict_async()


if __name__ == '__main__':
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
# coding: utf-8

# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for
# license information.
# --------------------------------------------------------------------------

"""
FILE: sample_convert_to_and_from_dict.py
DESCRIPTION:
This sample demonstrates how to convert models returned from an analyze operation
to and from a dictionary. The dictionary in this sample is then converted to a
JSON file, then the same dictionary is converted back to its original model.
USAGE:
python sample_convert_to_and_from_dict.py
Set the environment variables with your own values before running the sample:
1) AZURE_FORM_RECOGNIZER_ENDPOINT - the endpoint to your Cognitive Services resource.
2) AZURE_FORM_RECOGNIZER_KEY - your Form Recognizer API key
"""

import os
import json

def convert_to_and_from_dict():
path_to_sample_documents = os.path.abspath(
os.path.join(
os.path.abspath(__file__),
"..",
"..",
"./sample_forms/forms/Form_1.jpg",
)
)

from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer import DocumentAnalysisClient, AnalyzeResult

endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

document_analysis_client = DocumentAnalysisClient(
endpoint=endpoint, credential=AzureKeyCredential(key)
)
with open(path_to_sample_documents, "rb") as f:
poller = document_analysis_client.begin_analyze_document(
"prebuilt-document", document=f
)
result = poller.result()

# connvert the received model to a dictionary
d = result.to_dict()

# save the dictionary as a JSON content in a JSON file
with open('data.json', 'w') as f:
json.dump(d, f)

# convert the dictionary back to the original model
model = AnalyzeResult.from_dict(d)

# use the model as normal
for style in model.styles:
if style.is_handwritten:
print("Document contains handwritten content: ")
print(",".join([model.content[span.offset:span.offset + span.length] for span in style.spans]))

print("----Key-value pairs found in document----")
for kv_pair in model.key_value_pairs:
if kv_pair.key:
print(
"Key '{}' found within '{}' bounding regions".format(
kv_pair.key.content,
kv_pair.key.bounding_regions,
)
)
if kv_pair.value:
print(
"Value '{}' found within '{}' bounding regions\n".format(
kv_pair.value.content,
kv_pair.value.bounding_regions,
)
)

print("----Entities found in document----")
for entity in model.entities:
print("Entity of category '{}' with sub-category '{}'".format(entity.category, entity.sub_category))
print("...has content '{}'".format(entity.content))
print("...within '{}' bounding regions".format(entity.bounding_regions))
print("...with confidence {}\n".format(entity.confidence))

for table_idx, table in enumerate(model.tables):
print(
"Table # {} has {} rows and {} columns".format(
table_idx, table.row_count, table.column_count
)
)
for region in table.bounding_regions:
print(
"Table # {} location on page: {} is on {}".format(
table_idx,
region.page_number,
region.bounding_box,
)
)
for cell in table.cells:
print(
"...Cell[{}][{}] has content '{}'".format(
cell.row_index,
cell.column_index,
cell.content,
)
)
print("----------------------------------------")


if __name__ == "__main__":
convert_to_and_from_dict()

0 comments on commit af8bac3

Please sign in to comment.