-
Notifications
You must be signed in to change notification settings - Fork 2.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[text analytics] opinion mining support #12542
Changes from 6 commits
7755968
43e72dc
a3b4510
32ddecc
ea9fca3
590dd11
399571a
1ef46f9
858e70d
e83b145
ffe563d
09cf3fd
7ee99e0
e9fee50
77c7fa8
87b4a80
83426aa
7dc065c
273fb94
4180ad0
5b804d2
f03b477
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,9 +3,11 @@ | |
# Copyright (c) Microsoft Corporation. | ||
# Licensed under the MIT License. | ||
# ------------------------------------ | ||
|
||
from ._generated.v3_0.models._models import LanguageInput | ||
from ._generated.v3_0.models._models import MultiLanguageInput | ||
import re | ||
from ._generated.v3_0.models._models import ( | ||
LanguageInput, | ||
MultiLanguageInput | ||
) | ||
|
||
|
||
class DictMixin(object): | ||
|
@@ -635,19 +637,30 @@ class SentenceSentiment(DictMixin): | |
and 1 for the sentence for all labels. | ||
:vartype confidence_scores: | ||
~azure.ai.textanalytics.SentimentConfidenceScores | ||
:ivar aspects: The list of aspects of the sentence. An aspect is a | ||
key phrase of a sentence, for example the attributes of a product | ||
or a service. Only returned if `show_aspects` is set to True in | ||
call to `analyze_sentiment` | ||
:vartype aspects: | ||
list[~azure.ai.textanalytics.SentenceAspect] | ||
""" | ||
|
||
def __init__(self, **kwargs): | ||
self.text = kwargs.get("text", None) | ||
self.sentiment = kwargs.get("sentiment", None) | ||
self.confidence_scores = kwargs.get("confidence_scores", None) | ||
self.aspects = kwargs.get("aspects", None) | ||
|
||
@classmethod | ||
def _from_generated(cls, sentence): | ||
def _from_generated(cls, sentence, results): | ||
return cls( | ||
text=sentence.text, | ||
sentiment=sentence.sentiment, | ||
confidence_scores=SentimentConfidenceScores._from_generated(sentence.confidence_scores), # pylint: disable=protected-access | ||
aspects=( | ||
[SentenceAspect._from_generated(aspect, results) for aspect in sentence.aspects] # pylint: disable=protected-access | ||
if hasattr(sentence, "aspects") else None | ||
) | ||
) | ||
|
||
def __repr__(self): | ||
|
@@ -658,6 +671,125 @@ def __repr__(self): | |
)[:1024] | ||
|
||
|
||
class SentenceAspect(DictMixin): | ||
"""SentenceAspect contains the related opinions, predicted sentiment, | ||
confidence scores and other information about an aspect of a sentence. | ||
An aspect of a sentence is a key component of a sentence, for example | ||
in the sentence "The food is good", "food" is an aspect. | ||
|
||
:ivar str text: The aspect text. | ||
:ivar str sentiment: The predicted Sentiment for the aspect. Possible values | ||
include 'positive', 'mixed', and 'negative'. | ||
:ivar confidence_scores: The sentiment confidence score between 0 | ||
and 1 for the aspect for 'positive' and 'negative' labels. It's score | ||
for 'neutral' will always be 0 | ||
:vartype confidence_scores: | ||
~azure.ai.textanalytics.SentimentConfidenceScores | ||
:ivar opinions: All of the opinions in the sentence related to this aspect. | ||
:vartype opinions: list[~azure.ai.textanalytics.AspectOpinion] | ||
:ivar int offset: The aspect offset from the start of the sentence. | ||
iscai-msft marked this conversation as resolved.
Show resolved
Hide resolved
|
||
:ivar int length: The length of the aspect. | ||
""" | ||
|
||
def __init__(self, **kwargs): | ||
self.text = kwargs.get("text", None) | ||
self.sentiment = kwargs.get("sentiment", None) | ||
self.confidence_scores = kwargs.get("confidence_scores", None) | ||
self.opinions = kwargs.get("opinions", None) | ||
self.offset = kwargs.get("offset", None) | ||
self.length = kwargs.get("length", None) | ||
|
||
@staticmethod | ||
def _get_opinions(relations, results): | ||
if not relations: | ||
return [] | ||
opinion_relations = [r.ref for r in relations if r.relation_type == "opinion"] | ||
opinions = [] | ||
for opinion_relation in opinion_relations: | ||
nums = [int(s) for s in re.findall(r"\d+", opinion_relation)] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is technically incorrect parsing of the json pointer (it doesn't take escaping into account). This may or may not be an actual issue (I don't know of any of the keys can have a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In this case, the service will always returns something along the lines of |
||
document_index = nums[0] | ||
sentence_index = nums[1] | ||
opinion_index = nums[2] | ||
opinions.append( | ||
results[document_index].sentences[sentence_index].opinions[opinion_index] | ||
) | ||
return opinions | ||
|
||
|
||
@classmethod | ||
def _from_generated(cls, aspect, results): | ||
return cls( | ||
text=aspect.text, | ||
sentiment=aspect.sentiment, | ||
confidence_scores=SentimentConfidenceScores._from_generated(aspect.confidence_scores), # pylint: disable=protected-access | ||
opinions=[ | ||
AspectOpinion._from_generated(opinion) for opinion in cls._get_opinions(aspect.relations, results) # pylint: disable=protected-access | ||
], | ||
offset=aspect.offset, | ||
length=aspect.length | ||
) | ||
|
||
def __repr__(self): | ||
return "SentenceAspect(text={}, sentiment={}, confidence_scores={}, opinions={}, offset={}, length={})".format( | ||
self.text, | ||
self.sentiment, | ||
repr(self.confidence_scores), | ||
repr(self.opinions), | ||
self.offset, | ||
self.length | ||
)[:1024] | ||
|
||
|
||
class AspectOpinion(DictMixin): | ||
"""AspectOpinion contains the predicted sentiment, | ||
confidence scores and other information about an opinion of an aspect. | ||
For example, in the sentence "The food is good", the opinion of the | ||
aspect 'food' is 'good'. | ||
|
||
:ivar str text: The opinion text. | ||
:ivar str sentiment: The predicted Sentiment for the opinion. Possible values | ||
include 'positive', 'mixed', and 'negative'. | ||
:ivar confidence_scores: The sentiment confidence score between 0 | ||
and 1 for the opinion for 'positive' and 'negative' labels. It's score | ||
for 'neutral' will always be 0 | ||
:vartype confidence_scores: | ||
~azure.ai.textanalytics.SentimentConfidenceScores | ||
:ivar int offset: The opinion offset from the start of the sentence. | ||
:ivar int length: The length of the opinion. | ||
:ivar bool is_negated: Whether the opinion is negated. For example, in | ||
"The food is not good", the opinion "good" is negated. | ||
""" | ||
|
||
def __init__(self, **kwargs): | ||
self.text = kwargs.get("text", None) | ||
self.sentiment = kwargs.get("sentiment", None) | ||
self.confidence_scores = kwargs.get("confidence_scores", None) | ||
self.offset = kwargs.get("offset", None) | ||
self.length = kwargs.get("length", None) | ||
self.is_negated = kwargs.get("is_negated", None) | ||
|
||
@classmethod | ||
def _from_generated(cls, opinion): | ||
return cls( | ||
text=opinion.text, | ||
sentiment=opinion.sentiment, | ||
confidence_scores=SentimentConfidenceScores._from_generated(opinion.confidence_scores), # pylint: disable=protected-access | ||
offset=opinion.offset, | ||
length=opinion.length, | ||
is_negated=opinion.is_negated | ||
) | ||
|
||
def __repr__(self): | ||
return "AspectOpinion(text={}, sentiment={}, confidence_scores={}, offset={}, length={}, is_negated={})".format( | ||
self.text, | ||
self.sentiment, | ||
repr(self.confidence_scores), | ||
self.offset, | ||
self.length, | ||
self.is_negated | ||
)[:1024] | ||
|
||
|
||
class SentimentConfidenceScores(DictMixin): | ||
"""The confidence scores (Softmax scores) between 0 and 1. | ||
Higher values indicate higher confidence. | ||
|
@@ -671,15 +803,15 @@ class SentimentConfidenceScores(DictMixin): | |
""" | ||
|
||
def __init__(self, **kwargs): | ||
self.positive = kwargs.get('positive', None) | ||
self.neutral = kwargs.get('neutral', None) | ||
self.negative = kwargs.get('negative', None) | ||
self.positive = kwargs.get('positive', 0.0) | ||
self.neutral = kwargs.get('neutral', 0.0) | ||
self.negative = kwargs.get('negative', 0.0) | ||
|
||
@classmethod | ||
def _from_generated(cls, score): | ||
return cls( | ||
positive=score.positive, | ||
neutral=score.neutral, | ||
neutral=score.neutral if hasattr(score, "netural") else 0.0, | ||
negative=score.negative | ||
) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -378,6 +378,11 @@ def analyze_sentiment( # type: ignore | |
:type documents: | ||
list[str] or list[~azure.ai.textanalytics.TextDocumentInput] or | ||
list[dict[str, str]] | ||
:keyword bool show_aspects: Whether to conduct aspect-based sentiment analysis. | ||
Aspect-based sentiment analysis provides more granular analysis of sentiment and | ||
opinions around specific aspects or attributes of a product or service. | ||
If set to true, the returned :class:`~azure.ai.textanalytics.SentenceSentiment` objects | ||
will have property `aspects` containing the result of this analysis | ||
:keyword str language: The 2 letter ISO 639-1 representation of language for the | ||
entire batch. For example, use "en" for English; "es" for Spanish etc. | ||
If not set, uses "en" for English as default. Per-document language will | ||
|
@@ -408,11 +413,26 @@ def analyze_sentiment( # type: ignore | |
docs = _validate_batch_input(documents, "language", language) | ||
model_version = kwargs.pop("model_version", None) | ||
show_stats = kwargs.pop("show_stats", False) | ||
show_aspects = kwargs.pop("show_aspects", None) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this should default to False There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I went with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we need to distinguish between "the application explicitly passed in this value" vs. "the application didn't provide a value, so we'll pick an appropriate default for them" for positional arguments, then we use a sentinel value as the default value. This is often Since we are dealing with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @johanste I do need to still pop kwargs in this case since the name of the parameter has changed (we have it as |
||
|
||
try: | ||
if self._api_version == "v3.0": | ||
iscai-msft marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if show_aspects is not None: | ||
raise TypeError( | ||
"Parameter 'show_aspects' is only added for API version v3.1-preview.1 and up" | ||
) | ||
return self._client.sentiment( | ||
documents=docs, | ||
model_version=model_version, | ||
show_stats=show_stats, | ||
cls=kwargs.pop("cls", sentiment_result), | ||
**kwargs | ||
) | ||
return self._client.sentiment( | ||
documents=docs, | ||
model_version=model_version, | ||
show_stats=show_stats, | ||
opinion_mining=show_aspects, | ||
cls=kwargs.pop("cls", sentiment_result), | ||
**kwargs | ||
) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ApiVersion.V3_0
orApiVersion.V3_1_Preview_1
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm going to make a separate PR to default to v3.1-preview.1. For this PR, we had issues with
detect_langauge
(thanks for looking into that!) and it would require re-recording all of our tests, so wanted to address it in a separate PR