Skip to content

Commit

Permalink
[text analytics] Analyze updates for v5.1.0b6 (#17003)
Browse files Browse the repository at this point in the history
fixes #16372
  • Loading branch information
abhahn authored Mar 5, 2021
1 parent 4300118 commit c90a60d
Show file tree
Hide file tree
Showing 26 changed files with 1,542 additions and 1,790 deletions.
2 changes: 1 addition & 1 deletion sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
- Renamed classes `AspectSentiment` and `OpinionSentiment` to `TargetSentiment` and `AssessmentSentiment` respectively.

**New Features**

- Added `RecognizeLinkedEntitiesAction` as a supported action type for `begin_analyze_batch_actions`.
- Added parameter `categories_filter` to the `recognize_pii_entities` client method.
- Added enum `PiiEntityCategoryType`.
- Add property `normalized_text` to `HealthcareEntity`. This property is a normalized version of the `text` property that already
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
HealthcareEntity,
HealthcareEntityDataSource,
RecognizeEntitiesAction,
RecognizeLinkedEntitiesAction,
RecognizePiiEntitiesAction,
ExtractKeyPhrasesAction,
AnalyzeBatchActionsResult,
Expand Down Expand Up @@ -82,6 +83,7 @@
'HealthcareEntity',
'HealthcareEntityDataSource',
'RecognizeEntitiesAction',
'RecognizeLinkedEntitiesAction',
'RecognizePiiEntitiesAction',
'ExtractKeyPhrasesAction',
'AnalyzeBatchActionsResult',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
from azure.core.polling._async_poller import PollingReturnType


_FINISHED = frozenset(["succeeded", "cancelled", "failed", "partiallysucceeded"])
_FINISHED = frozenset(["succeeded", "cancelled", "failed", "partiallycompleted"])
_FAILED = frozenset(["failed"])
_SUCCEEDED = frozenset(["succeeded", "partiallysucceeded"])
_SUCCEEDED = frozenset(["succeeded", "partiallycompleted"])


class TextAnalyticsAsyncLROPollingMethod(AsyncLROBasePolling):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@ def __init__(self, *args, **kwargs):

class AnalyzeResultAsync(AsyncItemPaged):
def __init__(self, *args, **kwargs):
self.statistics = kwargs.pop('statistics')
self.statistics = kwargs.pop('statistics', None)
super(AnalyzeResultAsync, self).__init__(*args, **kwargs)
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
from azure.core.polling import LROPoller
from azure.core.polling.base_polling import LROBasePolling, OperationResourcePolling, OperationFailed, BadStatus

_FINISHED = frozenset(["succeeded", "cancelled", "failed", "partiallysucceeded"])
_FINISHED = frozenset(["succeeded", "cancelled", "failed", "partiallycompleted"])
_FAILED = frozenset(["failed"])
_SUCCEEDED = frozenset(["succeeded", "partiallysucceeded"])
_SUCCEEDED = frozenset(["succeeded", "partiallycompleted"])


class TextAnalyticsOperationResourcePolling(OperationResourcePolling):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1362,6 +1362,7 @@ class AnalyzeBatchActionsType(str, Enum):
RECOGNIZE_ENTITIES = "recognize_entities" #: Entities Recognition action.
RECOGNIZE_PII_ENTITIES = "recognize_pii_entities" #: PII Entities Recognition action.
EXTRACT_KEY_PHRASES = "extract_key_phrases" #: Key Phrase Extraction action.
RECOGNIZE_LINKED_ENTITIES = "recognize_linked_entities" #: Linked Entities Recognition action.


class AnalyzeBatchActionsResult(DictMixin):
Expand All @@ -1377,20 +1378,24 @@ class AnalyzeBatchActionsResult(DictMixin):
:vartype action_type: str or ~azure.ai.textanalytics.AnalyzeBatchActionsType
:ivar ~datetime.datetime completed_on: Date and time (UTC) when the result completed
on the service.
:ivar statistics: Overall statistics for the action result.
:vartype statistics: ~azure.ai.RequestStatistics
"""
def __init__(self, **kwargs):
self.document_results = kwargs.get("document_results")
self.is_error = False
self.action_type = kwargs.get("action_type")
self.completed_on = kwargs.get("completed_on")
self.statistics = kwargs.get("statistics")

def __repr__(self):
return "AnalyzeBatchActionsResult(document_results={}, is_error={}, action_type={}, completed_on={})" \
.format(
return "AnalyzeBatchActionsResult(document_results={}, is_error={}, action_type={}, completed_on={}, " \
"statistics={})".format(
repr(self.document_results),
self.is_error,
self.action_type,
self.completed_on
self.completed_on,
repr(self.statistics)
)[:1024]

class AnalyzeBatchActionsError(DictMixin):
Expand Down Expand Up @@ -1527,6 +1532,44 @@ def to_generated(self):
)
)


class RecognizeLinkedEntitiesAction(DictMixin):
"""RecognizeEntitiesAction encapsulates the parameters for starting a long-running Linked Entities
Recognition operation.
If you just want to recognize linked entities in a list of documents, and not perform a batch
of long running actions on the input of documents, call method `recognize_linked_entities` instead
of interfacing with this model.
:keyword str model_version: The model version to use for the analysis.
:keyword str string_index_type: Specifies the method used to interpret string offsets.
`UnicodeCodePoint`, the Python encoding, is the default. To override the Python default,
you can also pass in `Utf16CodePoint` or TextElements_v8`. For additional information
see https://aka.ms/text-analytics-offsets
:ivar str model_version: The model version to use for the analysis.
:ivar str string_index_type: Specifies the method used to interpret string offsets.
`UnicodeCodePoint`, the Python encoding, is the default. To override the Python default,
you can also pass in `Utf16CodePoint` or TextElements_v8`. For additional information
see https://aka.ms/text-analytics-offsets
"""

def __init__(self, **kwargs):
self.model_version = kwargs.get("model_version", "latest")
self.string_index_type = kwargs.get("string_index_type", "UnicodeCodePoint")

def __repr__(self, **kwargs):
return "RecognizeLinkedEntitiesAction(model_version={}, string_index_type={})" \
.format(self.model_version, self.string_index_type)[:1024]

def to_generated(self):
return _latest_preview_models.EntityLinkingTask(
parameters=_latest_preview_models.EntityLinkingTaskParameters(
model_version=self.model_version,
string_index_type=self.string_index_type
)
)


class RequestStatistics(DictMixin):
def __init__(self, **kwargs):
self.documents_count = kwargs.get("documents_count")
Expand All @@ -1544,8 +1587,8 @@ def _from_generated(cls, request_statistics):
)

def __repr__(self, **kwargs):
return "RequestStatistics(documents_count={}, valid_documents_count={}, erroneous_documents_count={}, \
transactions_count={}".format(
return "RequestStatistics(documents_count={}, valid_documents_count={}, erroneous_documents_count={}, " \
"transactions_count={})".format(
self.documents_count,
self.valid_documents_count,
self.erroneous_documents_count,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@ def __init__(self, *args, **kwargs):

class AnalyzeResult(ItemPaged):
def __init__(self, *args, **kwargs):
self.statistics = kwargs.pop('statistics')
self.statistics = kwargs.pop('statistics', None)
super(AnalyzeResult, self).__init__(*args, **kwargs)
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
TextDocumentInput,
RecognizeEntitiesAction,
RecognizePiiEntitiesAction,
RecognizeLinkedEntitiesAction,
AnalyzeBatchActionsType,
)

Expand Down Expand Up @@ -72,6 +73,8 @@ def _determine_action_type(action):
return AnalyzeBatchActionsType.RECOGNIZE_ENTITIES
if isinstance(action, RecognizePiiEntitiesAction):
return AnalyzeBatchActionsType.RECOGNIZE_PII_ENTITIES
if isinstance(action, RecognizeLinkedEntitiesAction):
return AnalyzeBatchActionsType.RECOGNIZE_LINKED_ENTITIES
return AnalyzeBatchActionsType.EXTRACT_KEY_PHRASES

def _check_string_index_type_arg(string_index_type_arg, api_version, string_index_type_default="UnicodeCodePoint"):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
RequestStatistics,
AnalyzeBatchActionsType,
AnalyzeBatchActionsError,
TextDocumentBatchStatistics,
_get_indices,
)
from ._paging import AnalyzeHealthcareEntitiesResult, AnalyzeResult
Expand Down Expand Up @@ -204,27 +203,34 @@ def _get_deserialization_callback_from_task_type(task_type):
return entities_result
if task_type == AnalyzeBatchActionsType.RECOGNIZE_PII_ENTITIES:
return pii_entities_result
if task_type == AnalyzeBatchActionsType.RECOGNIZE_LINKED_ENTITIES:
return linked_entities_result
return key_phrases_result

def _get_property_name_from_task_type(task_type):
if task_type == AnalyzeBatchActionsType.RECOGNIZE_ENTITIES:
return "entity_recognition_tasks"
if task_type == AnalyzeBatchActionsType.RECOGNIZE_PII_ENTITIES:
return "entity_recognition_pii_tasks"
if task_type == AnalyzeBatchActionsType.RECOGNIZE_LINKED_ENTITIES:
return "entity_linking_tasks"
return "key_phrase_extraction_tasks"

def _num_tasks_in_current_page(returned_tasks_object):
return (
len(returned_tasks_object.entity_recognition_tasks or []) +
len(returned_tasks_object.entity_recognition_pii_tasks or []) +
len(returned_tasks_object.key_phrase_extraction_tasks or [])
len(returned_tasks_object.key_phrase_extraction_tasks or []) +
len(returned_tasks_object.entity_linking_tasks or [])
)

def _get_task_type_from_error(error):
if "pii" in error.target.lower():
return AnalyzeBatchActionsType.RECOGNIZE_PII_ENTITIES
if "entity" in error.target.lower():
if "entityrecognition" in error.target.lower():
return AnalyzeBatchActionsType.RECOGNIZE_ENTITIES
if "entitylinking" in error.target.lower():
return AnalyzeBatchActionsType.RECOGNIZE_LINKED_ENTITIES
return AnalyzeBatchActionsType.EXTRACT_KEY_PHRASES

def _get_mapped_errors(analyze_job_state):
Expand All @@ -249,6 +255,9 @@ def _get_good_result(current_task_type, index_of_task_result, doc_id_order, resp
)
return AnalyzeBatchActionsResult(
document_results=document_results,
statistics=RequestStatistics._from_generated( # pylint: disable=protected-access
response_task_to_deserialize.results.statistics
) if response_task_to_deserialize.results.statistics else None,
action_type=current_task_type,
completed_on=response_task_to_deserialize.last_update_date_time,
)
Expand Down Expand Up @@ -312,9 +321,7 @@ def healthcare_paged_result(doc_id_order, health_status_callback, _, obj, respon
def analyze_paged_result(doc_id_order, task_order, analyze_status_callback, _, obj, response_headers, show_stats=False): # pylint: disable=unused-argument
return AnalyzeResult(
functools.partial(lro_get_next_page, analyze_status_callback, obj, show_stats=show_stats),
functools.partial(analyze_extract_page_data, doc_id_order, task_order, response_headers),
statistics=TextDocumentBatchStatistics._from_generated(obj.statistics) \
if (show_stats and obj.statistics) else None # pylint: disable=protected-access
functools.partial(analyze_extract_page_data, doc_id_order, task_order, response_headers)
)

def _get_deserialize():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from urllib.parse import urlparse, parse_qsl

from azure.core.async_paging import AsyncList
from ._models import RequestStatistics, TextDocumentBatchStatistics
from ._models import RequestStatistics
from ._async_paging import (
AnalyzeHealthcareEntitiesResultAsync,
AnalyzeResultAsync
Expand Down Expand Up @@ -58,6 +58,4 @@ def analyze_paged_result(
return AnalyzeResultAsync(
functools.partial(lro_get_next_page_async, analyze_status_callback, obj),
functools.partial(analyze_extract_page_data_async, doc_id_order, task_order, response_headers),
statistics=TextDocumentBatchStatistics._from_generated(obj.statistics) \
if show_stats and obj.statistics is not None else None # pylint: disable=protected-access
)
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
RecognizePiiEntitiesResult,
RecognizeEntitiesAction,
RecognizePiiEntitiesAction,
RecognizeLinkedEntitiesAction,
ExtractKeyPhrasesAction,
AnalyzeHealthcareEntitiesResultItem,
AnalyzeBatchActionsResult,
Expand Down Expand Up @@ -743,7 +744,7 @@ def _analyze_result_callback(self, doc_id_order, task_order, raw_response, _, he
def begin_analyze_batch_actions( # type: ignore
self,
documents, # type: Union[List[str], List[TextDocumentInput], List[Dict[str, str]]]
actions, # type: List[Union[RecognizeEntitiesAction, RecognizePiiEntitiesAction, ExtractKeyPhrasesAction]]
actions, # type: List[Union[RecognizeEntitiesAction, RecognizeLinkedEntitiesAction, RecognizePiiEntitiesAction, ExtractKeyPhrasesAction]] # pylint: disable=line-too-long
**kwargs # type: Any
): # type: (...) -> LROPoller[ItemPaged[AnalyzeBatchActionsResult]]
"""Start a long-running operation to perform a variety of text analysis actions over a batch of documents.
Expand All @@ -761,7 +762,8 @@ def begin_analyze_batch_actions( # type: ignore
The outputted action results will be in the same order you inputted your actions.
Duplicate actions in list not supported.
:type actions:
list[RecognizeEntitiesAction or RecognizePiiEntitiesAction or ExtractKeyPhrasesAction]
list[RecognizeEntitiesAction or RecognizePiiEntitiesAction or ExtractKeyPhrasesAction or
RecognizeLinkedEntitiesAction]
:keyword str display_name: An optional display name to set for the requested analysis.
:keyword str language: The 2 letter ISO 639-1 representation of language for the
entire batch. For example, use "en" for English; "es" for Spanish etc.
Expand Down Expand Up @@ -816,6 +818,13 @@ def begin_analyze_batch_actions( # type: ignore
key_phrase_extraction_tasks=[
t.to_generated() for t in
[a for a in actions if _determine_action_type(a) == AnalyzeBatchActionsType.EXTRACT_KEY_PHRASES]
],
entity_linking_tasks=[
t.to_generated() for t in
[
a for a in actions
if _determine_action_type(a) == AnalyzeBatchActionsType.RECOGNIZE_LINKED_ENTITIES
]
]
)
analyze_body = self._client.models(api_version='v3.1-preview.4').AnalyzeBatchInput(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -742,7 +742,8 @@ async def begin_analyze_batch_actions( # type: ignore
The outputted action results will be in the same order you inputted your actions.
Duplicate actions in list not supported.
:type actions:
list[RecognizeEntitiesAction or RecognizePiiEntitiesAction or ExtractKeyPhrasesAction]
list[RecognizeEntitiesAction or RecognizePiiEntitiesAction or ExtractKeyPhrasesAction or
RecognizeLinkedEntitiesAction]
:keyword str display_name: An optional display name to set for the requested analysis.
:keyword str language: The 2 letter ISO 639-1 representation of language for the
entire batch. For example, use "en" for English; "es" for Spanish etc.
Expand Down Expand Up @@ -797,6 +798,13 @@ async def begin_analyze_batch_actions( # type: ignore
key_phrase_extraction_tasks=[
t.to_generated() for t in
[a for a in actions if _determine_action_type(a) == AnalyzeBatchActionsType.EXTRACT_KEY_PHRASES]
],
entity_linking_tasks=[
t.to_generated() for t in
[
a for a in actions if \
_determine_action_type(a) == AnalyzeBatchActionsType.RECOGNIZE_LINKED_ENTITIES
]
]
)
analyze_body = self._client.models(api_version='v3.1-preview.4').AnalyzeBatchInput(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ async def analyze_async(self):
from azure.ai.textanalytics.aio import TextAnalyticsClient
from azure.ai.textanalytics import (
RecognizeEntitiesAction,
RecognizeLinkedEntitiesAction,
RecognizePiiEntitiesAction,
ExtractKeyPhrasesAction,
AnalyzeBatchActionsType
Expand Down Expand Up @@ -63,7 +64,8 @@ async def analyze_async(self):
actions=[
RecognizeEntitiesAction(),
RecognizePiiEntitiesAction(),
ExtractKeyPhrasesAction()
ExtractKeyPhrasesAction(),
RecognizeLinkedEntitiesAction()
]
)

Expand Down Expand Up @@ -104,6 +106,24 @@ async def analyze_async(self):
print("Key Phrases: {}\n".format(doc.key_phrases))
print("------------------------------------------")

if action_result.action_type == AnalyzeBatchActionsType.RECOGNIZE_LINKED_ENTITIES:
print("Results of Linked Entities Recognition action:")
for idx, doc in enumerate(action_result.document_results):
print("Document text: {}\n".format(documents[idx]))
for linked_entity in doc.entities:
print("Entity name: {}".format(linked_entity.name))
print("...Data source: {}".format(linked_entity.data_source))
print("...Data source language: {}".format(linked_entity.language))
print("...Data source entity ID: {}".format(linked_entity.data_source_entity_id))
print("...Data source URL: {}".format(linked_entity.url))
print("...Document matches:")
for match in linked_entity.matches:
print("......Match text: {}".format(match.text))
print(".........Confidence Score: {}".format(match.confidence_score))
print(".........Offset: {}".format(match.offset))
print(".........Length: {}".format(match.length))
print("------------------------------------------")

# [END analyze_async]


Expand Down
Loading

0 comments on commit c90a60d

Please sign in to comment.