[text analytics] Analyze updates for v5.1.0b6 (#17003)

fixes #16372
Azure · Mar 5, 2021 · c90a60d · c90a60d
1 parent 4300118
commit c90a60d
Show file tree

Hide file tree

Showing 26 changed files with 1,542 additions and 1,790 deletions.
diff --git a/sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md b/sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md
@@ -9,7 +9,7 @@
 - Renamed classes `AspectSentiment` and `OpinionSentiment` to `TargetSentiment` and `AssessmentSentiment` respectively.
 
 **New Features**
-
+- Added `RecognizeLinkedEntitiesAction` as a supported action type for `begin_analyze_batch_actions`.
 - Added parameter `categories_filter` to the `recognize_pii_entities` client method.
 - Added enum `PiiEntityCategoryType`.
 - Add property `normalized_text` to `HealthcareEntity`. This property is a normalized version of the `text` property that already

diff --git a/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/__init__.py b/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/__init__.py
@@ -36,6 +36,7 @@
     HealthcareEntity,
     HealthcareEntityDataSource,
     RecognizeEntitiesAction,
+    RecognizeLinkedEntitiesAction,
     RecognizePiiEntitiesAction,
     ExtractKeyPhrasesAction,
     AnalyzeBatchActionsResult,
@@ -82,6 +83,7 @@
     'HealthcareEntity',
     'HealthcareEntityDataSource',
     'RecognizeEntitiesAction',
+    'RecognizeLinkedEntitiesAction',
     'RecognizePiiEntitiesAction',
     'ExtractKeyPhrasesAction',
     'AnalyzeBatchActionsResult',

diff --git a/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_async_lro.py b/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_async_lro.py
@@ -11,9 +11,9 @@
 from azure.core.polling._async_poller import PollingReturnType
 
 
-_FINISHED = frozenset(["succeeded", "cancelled", "failed", "partiallysucceeded"])
+_FINISHED = frozenset(["succeeded", "cancelled", "failed", "partiallycompleted"])
 _FAILED = frozenset(["failed"])
-_SUCCEEDED = frozenset(["succeeded", "partiallysucceeded"])
+_SUCCEEDED = frozenset(["succeeded", "partiallycompleted"])
 
 
 class TextAnalyticsAsyncLROPollingMethod(AsyncLROBasePolling):

diff --git a/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_async_paging.py b/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_async_paging.py
@@ -16,5 +16,5 @@ def __init__(self, *args, **kwargs):
 
 class AnalyzeResultAsync(AsyncItemPaged):
     def __init__(self, *args, **kwargs):
-        self.statistics = kwargs.pop('statistics')
+        self.statistics = kwargs.pop('statistics', None)
         super(AnalyzeResultAsync, self).__init__(*args, **kwargs)
diff --git a/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_lro.py b/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_lro.py
@@ -8,9 +8,9 @@
 from azure.core.polling import LROPoller
 from azure.core.polling.base_polling import LROBasePolling, OperationResourcePolling, OperationFailed, BadStatus
 
-_FINISHED = frozenset(["succeeded", "cancelled", "failed", "partiallysucceeded"])
+_FINISHED = frozenset(["succeeded", "cancelled", "failed", "partiallycompleted"])
 _FAILED = frozenset(["failed"])
-_SUCCEEDED = frozenset(["succeeded", "partiallysucceeded"])
+_SUCCEEDED = frozenset(["succeeded", "partiallycompleted"])
 
 
 class TextAnalyticsOperationResourcePolling(OperationResourcePolling):

diff --git a/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_models.py b/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_models.py
@@ -1362,6 +1362,7 @@ class AnalyzeBatchActionsType(str, Enum):
     RECOGNIZE_ENTITIES = "recognize_entities"  #: Entities Recognition action.
     RECOGNIZE_PII_ENTITIES = "recognize_pii_entities"  #: PII Entities Recognition action.
     EXTRACT_KEY_PHRASES = "extract_key_phrases"  #: Key Phrase Extraction action.
+    RECOGNIZE_LINKED_ENTITIES = "recognize_linked_entities" #: Linked Entities Recognition action.
 
 
 class AnalyzeBatchActionsResult(DictMixin):
@@ -1377,20 +1378,24 @@ class AnalyzeBatchActionsResult(DictMixin):
     :vartype action_type: str or ~azure.ai.textanalytics.AnalyzeBatchActionsType
     :ivar ~datetime.datetime completed_on: Date and time (UTC) when the result completed
         on the service.
+    :ivar statistics: Overall statistics for the action result.
+    :vartype statistics: ~azure.ai.RequestStatistics
     """
     def __init__(self, **kwargs):
         self.document_results = kwargs.get("document_results")
         self.is_error = False
         self.action_type = kwargs.get("action_type")
         self.completed_on = kwargs.get("completed_on")
+        self.statistics = kwargs.get("statistics")
 
     def __repr__(self):
-        return "AnalyzeBatchActionsResult(document_results={}, is_error={}, action_type={}, completed_on={})" \
-            .format(
+        return "AnalyzeBatchActionsResult(document_results={}, is_error={}, action_type={}, completed_on={}, " \
+            "statistics={})".format(
                 repr(self.document_results),
                 self.is_error,
                 self.action_type,
-                self.completed_on
+                self.completed_on,
+                repr(self.statistics)
             )[:1024]
 
 class AnalyzeBatchActionsError(DictMixin):
@@ -1527,6 +1532,44 @@ def to_generated(self):
             )
         )
 
+
+class RecognizeLinkedEntitiesAction(DictMixin):
+    """RecognizeEntitiesAction encapsulates the parameters for starting a long-running Linked Entities
+    Recognition operation.
+
+    If you just want to recognize linked entities in a list of documents, and not perform a batch
+    of long running actions on the input of documents, call method `recognize_linked_entities` instead
+    of interfacing with this model.
+
+    :keyword str model_version: The model version to use for the analysis.
+    :keyword str string_index_type: Specifies the method used to interpret string offsets.
+        `UnicodeCodePoint`, the Python encoding, is the default. To override the Python default,
+        you can also pass in `Utf16CodePoint` or TextElements_v8`. For additional information
+        see https://aka.ms/text-analytics-offsets
+    :ivar str model_version: The model version to use for the analysis.
+    :ivar str string_index_type: Specifies the method used to interpret string offsets.
+        `UnicodeCodePoint`, the Python encoding, is the default. To override the Python default,
+        you can also pass in `Utf16CodePoint` or TextElements_v8`. For additional information
+        see https://aka.ms/text-analytics-offsets
+    """
+
+    def __init__(self, **kwargs):
+        self.model_version = kwargs.get("model_version", "latest")
+        self.string_index_type = kwargs.get("string_index_type", "UnicodeCodePoint")
+
+    def __repr__(self, **kwargs):
+        return "RecognizeLinkedEntitiesAction(model_version={}, string_index_type={})" \
+            .format(self.model_version, self.string_index_type)[:1024]
+
+    def to_generated(self):
+        return _latest_preview_models.EntityLinkingTask(
+            parameters=_latest_preview_models.EntityLinkingTaskParameters(
+                model_version=self.model_version,
+                string_index_type=self.string_index_type
+            )
+        )
+
+
 class RequestStatistics(DictMixin):
     def __init__(self, **kwargs):
         self.documents_count = kwargs.get("documents_count")
@@ -1544,8 +1587,8 @@ def _from_generated(cls, request_statistics):
         )
 
     def __repr__(self, **kwargs):
-        return "RequestStatistics(documents_count={}, valid_documents_count={}, erroneous_documents_count={}, \
-            transactions_count={}".format(
+        return "RequestStatistics(documents_count={}, valid_documents_count={}, erroneous_documents_count={}, " \
+            "transactions_count={})".format(
                 self.documents_count,
                 self.valid_documents_count,
                 self.erroneous_documents_count,

diff --git a/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_paging.py b/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_paging.py
@@ -16,5 +16,5 @@ def __init__(self, *args, **kwargs):
 
 class AnalyzeResult(ItemPaged):
     def __init__(self, *args, **kwargs):
-        self.statistics = kwargs.pop('statistics')
+        self.statistics = kwargs.pop('statistics', None)
         super(AnalyzeResult, self).__init__(*args, **kwargs)
diff --git a/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_request_handlers.py b/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_request_handlers.py
@@ -12,6 +12,7 @@
     TextDocumentInput,
     RecognizeEntitiesAction,
     RecognizePiiEntitiesAction,
+    RecognizeLinkedEntitiesAction,
     AnalyzeBatchActionsType,
 )
 
@@ -72,6 +73,8 @@ def _determine_action_type(action):
         return AnalyzeBatchActionsType.RECOGNIZE_ENTITIES
     if isinstance(action, RecognizePiiEntitiesAction):
         return AnalyzeBatchActionsType.RECOGNIZE_PII_ENTITIES
+    if isinstance(action, RecognizeLinkedEntitiesAction):
+        return AnalyzeBatchActionsType.RECOGNIZE_LINKED_ENTITIES
     return AnalyzeBatchActionsType.EXTRACT_KEY_PHRASES
 
 def _check_string_index_type_arg(string_index_type_arg, api_version, string_index_type_default="UnicodeCodePoint"):

diff --git a/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_response_handlers.py b/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_response_handlers.py
@@ -35,7 +35,6 @@
     RequestStatistics,
     AnalyzeBatchActionsType,
     AnalyzeBatchActionsError,
-    TextDocumentBatchStatistics,
     _get_indices,
 )
 from ._paging import AnalyzeHealthcareEntitiesResult, AnalyzeResult
@@ -204,27 +203,34 @@ def _get_deserialization_callback_from_task_type(task_type):
         return entities_result
     if task_type == AnalyzeBatchActionsType.RECOGNIZE_PII_ENTITIES:
         return pii_entities_result
+    if task_type == AnalyzeBatchActionsType.RECOGNIZE_LINKED_ENTITIES:
+        return linked_entities_result
     return key_phrases_result
 
 def _get_property_name_from_task_type(task_type):
     if task_type == AnalyzeBatchActionsType.RECOGNIZE_ENTITIES:
         return "entity_recognition_tasks"
     if task_type == AnalyzeBatchActionsType.RECOGNIZE_PII_ENTITIES:
         return "entity_recognition_pii_tasks"
+    if task_type == AnalyzeBatchActionsType.RECOGNIZE_LINKED_ENTITIES:
+        return "entity_linking_tasks"
     return "key_phrase_extraction_tasks"
 
 def _num_tasks_in_current_page(returned_tasks_object):
     return (
         len(returned_tasks_object.entity_recognition_tasks or []) +
         len(returned_tasks_object.entity_recognition_pii_tasks or []) +
-        len(returned_tasks_object.key_phrase_extraction_tasks or [])
+        len(returned_tasks_object.key_phrase_extraction_tasks or []) +
+        len(returned_tasks_object.entity_linking_tasks or [])
     )
 
 def _get_task_type_from_error(error):
     if "pii" in error.target.lower():
         return AnalyzeBatchActionsType.RECOGNIZE_PII_ENTITIES
-    if "entity" in error.target.lower():
+    if "entityrecognition" in error.target.lower():
         return AnalyzeBatchActionsType.RECOGNIZE_ENTITIES
+    if "entitylinking" in error.target.lower():
+        return AnalyzeBatchActionsType.RECOGNIZE_LINKED_ENTITIES
     return AnalyzeBatchActionsType.EXTRACT_KEY_PHRASES
 
 def _get_mapped_errors(analyze_job_state):
@@ -249,6 +255,9 @@ def _get_good_result(current_task_type, index_of_task_result, doc_id_order, resp
     )
     return AnalyzeBatchActionsResult(
         document_results=document_results,
+        statistics=RequestStatistics._from_generated( # pylint: disable=protected-access
+            response_task_to_deserialize.results.statistics
+        ) if response_task_to_deserialize.results.statistics else None,
         action_type=current_task_type,
         completed_on=response_task_to_deserialize.last_update_date_time,
     )
@@ -312,9 +321,7 @@ def healthcare_paged_result(doc_id_order, health_status_callback, _, obj, respon
 def analyze_paged_result(doc_id_order, task_order, analyze_status_callback, _, obj, response_headers, show_stats=False): # pylint: disable=unused-argument
     return AnalyzeResult(
         functools.partial(lro_get_next_page, analyze_status_callback, obj, show_stats=show_stats),
-        functools.partial(analyze_extract_page_data, doc_id_order, task_order, response_headers),
-        statistics=TextDocumentBatchStatistics._from_generated(obj.statistics) \
-            if (show_stats and obj.statistics) else None # pylint: disable=protected-access
+        functools.partial(analyze_extract_page_data, doc_id_order, task_order, response_headers)
     )
 
 def _get_deserialize():

diff --git a/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_response_handlers_async.py b/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_response_handlers_async.py
@@ -9,7 +9,7 @@
 from urllib.parse import urlparse, parse_qsl
 
 from azure.core.async_paging import AsyncList
-from ._models import RequestStatistics, TextDocumentBatchStatistics
+from ._models import RequestStatistics
 from ._async_paging import (
     AnalyzeHealthcareEntitiesResultAsync,
     AnalyzeResultAsync
@@ -58,6 +58,4 @@ def analyze_paged_result(
     return AnalyzeResultAsync(
         functools.partial(lro_get_next_page_async, analyze_status_callback, obj),
         functools.partial(analyze_extract_page_data_async, doc_id_order, task_order, response_headers),
-        statistics=TextDocumentBatchStatistics._from_generated(obj.statistics) \
-            if show_stats and obj.statistics is not None else None # pylint: disable=protected-access
     )
diff --git a/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_text_analytics_client.py b/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_text_analytics_client.py
@@ -58,6 +58,7 @@
         RecognizePiiEntitiesResult,
         RecognizeEntitiesAction,
         RecognizePiiEntitiesAction,
+        RecognizeLinkedEntitiesAction,
         ExtractKeyPhrasesAction,
         AnalyzeHealthcareEntitiesResultItem,
         AnalyzeBatchActionsResult,
@@ -743,7 +744,7 @@ def _analyze_result_callback(self, doc_id_order, task_order, raw_response, _, he
     def begin_analyze_batch_actions(  # type: ignore
         self,
         documents,  # type: Union[List[str], List[TextDocumentInput], List[Dict[str, str]]]
-        actions,  # type: List[Union[RecognizeEntitiesAction, RecognizePiiEntitiesAction, ExtractKeyPhrasesAction]]
+        actions,  # type: List[Union[RecognizeEntitiesAction, RecognizeLinkedEntitiesAction, RecognizePiiEntitiesAction, ExtractKeyPhrasesAction]] # pylint: disable=line-too-long
         **kwargs  # type: Any
     ):  # type: (...) -> LROPoller[ItemPaged[AnalyzeBatchActionsResult]]
         """Start a long-running operation to perform a variety of text analysis actions over a batch of documents.
@@ -761,7 +762,8 @@ def begin_analyze_batch_actions(  # type: ignore
             The outputted action results will be in the same order you inputted your actions.
             Duplicate actions in list not supported.
         :type actions:
-            list[RecognizeEntitiesAction or RecognizePiiEntitiesAction or ExtractKeyPhrasesAction]
+            list[RecognizeEntitiesAction or RecognizePiiEntitiesAction or ExtractKeyPhrasesAction or
+            RecognizeLinkedEntitiesAction]
         :keyword str display_name: An optional display name to set for the requested analysis.
         :keyword str language: The 2 letter ISO 639-1 representation of language for the
             entire batch. For example, use "en" for English; "es" for Spanish etc.
@@ -816,6 +818,13 @@ def begin_analyze_batch_actions(  # type: ignore
                 key_phrase_extraction_tasks=[
                     t.to_generated() for t in
                     [a for a in actions if _determine_action_type(a) == AnalyzeBatchActionsType.EXTRACT_KEY_PHRASES]
+                ],
+                entity_linking_tasks=[
+                    t.to_generated() for t in
+                    [
+                        a for a in actions
+                        if _determine_action_type(a) == AnalyzeBatchActionsType.RECOGNIZE_LINKED_ENTITIES
+                    ]
                 ]
             )
             analyze_body = self._client.models(api_version='v3.1-preview.4').AnalyzeBatchInput(

diff --git a/...alytics/azure-ai-textanalytics/azure/ai/textanalytics/aio/_text_analytics_client_async.py b/...alytics/azure-ai-textanalytics/azure/ai/textanalytics/aio/_text_analytics_client_async.py
@@ -742,7 +742,8 @@ async def begin_analyze_batch_actions(  # type: ignore
             The outputted action results will be in the same order you inputted your actions.
             Duplicate actions in list not supported.
         :type actions:
-            list[RecognizeEntitiesAction or RecognizePiiEntitiesAction or ExtractKeyPhrasesAction]
+            list[RecognizeEntitiesAction or RecognizePiiEntitiesAction or ExtractKeyPhrasesAction or
+            RecognizeLinkedEntitiesAction]
         :keyword str display_name: An optional display name to set for the requested analysis.
         :keyword str language: The 2 letter ISO 639-1 representation of language for the
             entire batch. For example, use "en" for English; "es" for Spanish etc.
@@ -797,6 +798,13 @@ async def begin_analyze_batch_actions(  # type: ignore
                 key_phrase_extraction_tasks=[
                     t.to_generated() for t in
                     [a for a in actions if _determine_action_type(a) == AnalyzeBatchActionsType.EXTRACT_KEY_PHRASES]
+                ],
+                entity_linking_tasks=[
+                    t.to_generated() for t in
+                    [
+                        a for a in actions if \
+                        _determine_action_type(a) == AnalyzeBatchActionsType.RECOGNIZE_LINKED_ENTITIES
+                    ]
                 ]
             )
             analyze_body = self._client.models(api_version='v3.1-preview.4').AnalyzeBatchInput(

diff --git a/...lytics/azure-ai-textanalytics/samples/async_samples/sample_analyze_batch_actions_async.py b/...lytics/azure-ai-textanalytics/samples/async_samples/sample_analyze_batch_actions_async.py
@@ -34,6 +34,7 @@ async def analyze_async(self):
         from azure.ai.textanalytics.aio import TextAnalyticsClient
         from azure.ai.textanalytics import (
             RecognizeEntitiesAction,
+            RecognizeLinkedEntitiesAction,
             RecognizePiiEntitiesAction,
             ExtractKeyPhrasesAction,
             AnalyzeBatchActionsType
@@ -63,7 +64,8 @@ async def analyze_async(self):
                 actions=[
                     RecognizeEntitiesAction(),
                     RecognizePiiEntitiesAction(),
-                    ExtractKeyPhrasesAction()
+                    ExtractKeyPhrasesAction(),
+                    RecognizeLinkedEntitiesAction()
                 ]
             )
 
@@ -104,6 +106,24 @@ async def analyze_async(self):
                         print("Key Phrases: {}\n".format(doc.key_phrases))
                         print("------------------------------------------")
 
+                if action_result.action_type == AnalyzeBatchActionsType.RECOGNIZE_LINKED_ENTITIES:
+                    print("Results of Linked Entities Recognition action:")
+                    for idx, doc in enumerate(action_result.document_results):
+                        print("Document text: {}\n".format(documents[idx]))
+                        for linked_entity in doc.entities:
+                            print("Entity name: {}".format(linked_entity.name))
+                            print("...Data source: {}".format(linked_entity.data_source))
+                            print("...Data source language: {}".format(linked_entity.language))
+                            print("...Data source entity ID: {}".format(linked_entity.data_source_entity_id))
+                            print("...Data source URL: {}".format(linked_entity.url))
+                            print("...Document matches:")
+                            for match in linked_entity.matches:
+                                print("......Match text: {}".format(match.text))
+                                print(".........Confidence Score: {}".format(match.confidence_score))
+                                print(".........Offset: {}".format(match.offset))
+                                print(".........Length: {}".format(match.length))
+                        print("------------------------------------------")
+
         # [END analyze_async]