From af8bac3cf4f4600d15a342afd975d916c4823e42 Mon Sep 17 00:00:00 2001 From: Catalina Peralta Date: Mon, 15 Nov 2021 15:53:32 -0800 Subject: [PATCH] add samples for converting to and from dictionaries --- .../sample_convert_to_and_from_dict_async.py | 128 ++++++++++++++++++ .../sample_convert_to_and_from_dict.py | 119 ++++++++++++++++ 2 files changed, 247 insertions(+) create mode 100644 sdk/formrecognizer/azure-ai-formrecognizer/samples/v3.2-beta/async_samples/sample_convert_to_and_from_dict_async.py create mode 100644 sdk/formrecognizer/azure-ai-formrecognizer/samples/v3.2-beta/sample_convert_to_and_from_dict.py diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/samples/v3.2-beta/async_samples/sample_convert_to_and_from_dict_async.py b/sdk/formrecognizer/azure-ai-formrecognizer/samples/v3.2-beta/async_samples/sample_convert_to_and_from_dict_async.py new file mode 100644 index 000000000000..c31dee62c4dd --- /dev/null +++ b/sdk/formrecognizer/azure-ai-formrecognizer/samples/v3.2-beta/async_samples/sample_convert_to_and_from_dict_async.py @@ -0,0 +1,128 @@ +# coding: utf-8 + +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +# -------------------------------------------------------------------------- + +""" +FILE: sample_convert_to_and_from_dict_async.py + +DESCRIPTION: + This sample demonstrates how to convert models returned from an analyze operation + to and from a dictionary. The dictionary in this sample is then converted to a + JSON file, then the same dictionary is converted back to its original model. + +USAGE: + python sample_convert_to_and_from_dict_async.py + + Set the environment variables with your own values before running the sample: + 1) AZURE_FORM_RECOGNIZER_ENDPOINT - the endpoint to your Cognitive Services resource. + 2) AZURE_FORM_RECOGNIZER_KEY - your Form Recognizer API key +""" + +import os +import json +import asyncio + +async def convert_to_and_from_dict_async(): + path_to_sample_documents = os.path.abspath( + os.path.join( + os.path.abspath(__file__), + "..", + "..", + "..", + "./sample_forms/forms/Form_1.jpg", + ) + ) + + from azure.core.credentials import AzureKeyCredential + from azure.ai.formrecognizer.aio import DocumentAnalysisClient + from azure.ai.formrecognizer import AnalyzeResult + + endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"] + key = os.environ["AZURE_FORM_RECOGNIZER_KEY"] + + document_analysis_client = DocumentAnalysisClient( + endpoint=endpoint, credential=AzureKeyCredential(key) + ) + async with document_analysis_client: + with open(path_to_sample_documents, "rb") as f: + poller = await document_analysis_client.begin_analyze_document( + "prebuilt-document", document=f + ) + result = await poller.result() + + # connvert the received model to a dictionary + d = result.to_dict() + + # save the dictionary as a JSON content in a JSON file + with open('data.json', 'w') as f: + json.dump(d, f) + + # convert the dictionary back to the original model + model = AnalyzeResult.from_dict(d) + + # use the model as normal + for style in model.styles: + if style.is_handwritten: + print("Document contains handwritten content: ") + print(",".join([model.content[span.offset:span.offset + span.length] for span in style.spans])) + + print("----Key-value pairs found in document----") + for kv_pair in model.key_value_pairs: + if kv_pair.key: + print( + "Key '{}' found within '{}' bounding regions".format( + kv_pair.key.content, + kv_pair.key.bounding_regions, + ) + ) + if kv_pair.value: + print( + "Value '{}' found within '{}' bounding regions\n".format( + kv_pair.value.content, + kv_pair.value.bounding_regions, + ) + ) + + print("----Entities found in document----") + for entity in model.entities: + print("Entity of category '{}' with sub-category '{}'".format(entity.category, entity.sub_category)) + print("...has content '{}'".format(entity.content)) + print("...within '{}' bounding regions".format(entity.bounding_regions)) + print("...with confidence {}\n".format(entity.confidence)) + + for table_idx, table in enumerate(model.tables): + print( + "Table # {} has {} rows and {} columns".format( + table_idx, table.row_count, table.column_count + ) + ) + for region in table.bounding_regions: + print( + "Table # {} location on page: {} is on {}".format( + table_idx, + region.page_number, + region.bounding_box, + ) + ) + for cell in table.cells: + print( + "...Cell[{}][{}] has content '{}'".format( + cell.row_index, + cell.column_index, + cell.content, + ) + ) + print("----------------------------------------") + + +async def main(): + await convert_to_and_from_dict_async() + + +if __name__ == '__main__': + loop = asyncio.get_event_loop() + loop.run_until_complete(main()) diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/samples/v3.2-beta/sample_convert_to_and_from_dict.py b/sdk/formrecognizer/azure-ai-formrecognizer/samples/v3.2-beta/sample_convert_to_and_from_dict.py new file mode 100644 index 000000000000..41075ba1cf91 --- /dev/null +++ b/sdk/formrecognizer/azure-ai-formrecognizer/samples/v3.2-beta/sample_convert_to_and_from_dict.py @@ -0,0 +1,119 @@ +# coding: utf-8 + +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +# -------------------------------------------------------------------------- + +""" +FILE: sample_convert_to_and_from_dict.py + +DESCRIPTION: + This sample demonstrates how to convert models returned from an analyze operation + to and from a dictionary. The dictionary in this sample is then converted to a + JSON file, then the same dictionary is converted back to its original model. + +USAGE: + python sample_convert_to_and_from_dict.py + + Set the environment variables with your own values before running the sample: + 1) AZURE_FORM_RECOGNIZER_ENDPOINT - the endpoint to your Cognitive Services resource. + 2) AZURE_FORM_RECOGNIZER_KEY - your Form Recognizer API key +""" + +import os +import json + +def convert_to_and_from_dict(): + path_to_sample_documents = os.path.abspath( + os.path.join( + os.path.abspath(__file__), + "..", + "..", + "./sample_forms/forms/Form_1.jpg", + ) + ) + + from azure.core.credentials import AzureKeyCredential + from azure.ai.formrecognizer import DocumentAnalysisClient, AnalyzeResult + + endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"] + key = os.environ["AZURE_FORM_RECOGNIZER_KEY"] + + document_analysis_client = DocumentAnalysisClient( + endpoint=endpoint, credential=AzureKeyCredential(key) + ) + with open(path_to_sample_documents, "rb") as f: + poller = document_analysis_client.begin_analyze_document( + "prebuilt-document", document=f + ) + result = poller.result() + + # connvert the received model to a dictionary + d = result.to_dict() + + # save the dictionary as a JSON content in a JSON file + with open('data.json', 'w') as f: + json.dump(d, f) + + # convert the dictionary back to the original model + model = AnalyzeResult.from_dict(d) + + # use the model as normal + for style in model.styles: + if style.is_handwritten: + print("Document contains handwritten content: ") + print(",".join([model.content[span.offset:span.offset + span.length] for span in style.spans])) + + print("----Key-value pairs found in document----") + for kv_pair in model.key_value_pairs: + if kv_pair.key: + print( + "Key '{}' found within '{}' bounding regions".format( + kv_pair.key.content, + kv_pair.key.bounding_regions, + ) + ) + if kv_pair.value: + print( + "Value '{}' found within '{}' bounding regions\n".format( + kv_pair.value.content, + kv_pair.value.bounding_regions, + ) + ) + + print("----Entities found in document----") + for entity in model.entities: + print("Entity of category '{}' with sub-category '{}'".format(entity.category, entity.sub_category)) + print("...has content '{}'".format(entity.content)) + print("...within '{}' bounding regions".format(entity.bounding_regions)) + print("...with confidence {}\n".format(entity.confidence)) + + for table_idx, table in enumerate(model.tables): + print( + "Table # {} has {} rows and {} columns".format( + table_idx, table.row_count, table.column_count + ) + ) + for region in table.bounding_regions: + print( + "Table # {} location on page: {} is on {}".format( + table_idx, + region.page_number, + region.bounding_box, + ) + ) + for cell in table.cells: + print( + "...Cell[{}][{}] has content '{}'".format( + cell.row_index, + cell.column_index, + cell.content, + ) + ) + print("----------------------------------------") + + +if __name__ == "__main__": + convert_to_and_from_dict()