Skip to content
This repository has been archived by the owner on Sep 29, 2023. It is now read-only.

Commit

Permalink
Merge pull request #120 from AzureAD/return-xml-as-is
Browse files Browse the repository at this point in the history
Return xml as is
  • Loading branch information
rayluo authored Mar 2, 2018
2 parents f8fd8ef + f449f2f commit f836ad9
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 1 deletion.
45 changes: 44 additions & 1 deletion adal/wstrust_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,35 @@ def scrub_rstr_log_message(response_str):

return 'RSTR Response: ' + scrubbed_rstr

def findall_content(xml_string, tag):
"""
Given a tag name without any prefix,
this function returns a list of the raw content inside this tag as-is.
>>> findall_content("<ns0:foo> what <bar> ever </bar> content </ns0:foo>", "foo")
[" what <bar> ever </bar> content "]
Motivation:
Usually we would use XML parser to extract the data by xpath.
However the ElementTree in Python will implicitly normalize the output
by "hoisting" the inner inline namespaces into the outmost element.
The result will be a semantically equivalent XML snippet,
but not fully identical to the original one.
While this effect shouldn't become a problem in all other cases,
it does not seem to fully comply with Exclusive XML Canonicalization spec
(https://www.w3.org/TR/xml-exc-c14n/), and void the SAML token signature.
SAML signature algo needs the "XML -> C14N(XML) -> Signed(C14N(Xml))" order.
The binary extention lxml is probably the canonical way to solve this
(https://stackoverflow.com/questions/22959577/python-exclusive-xml-canonicalization-xml-exc-c14n)
but here we use this workaround, based on Regex, to return raw content as-is.
"""
# \w+ is good enough for https://www.w3.org/TR/REC-xml/#NT-NameChar
pattern = r"<(?:\w+:)?%(tag)s(?:[^>]*)>(.*)</(?:\w+:)?%(tag)s" % {"tag": tag}
return re.findall(pattern, xml_string, re.DOTALL)


class WSTrustResponse(object):

def __init__(self, call_context, response, wstrust_version):
Expand Down Expand Up @@ -178,6 +207,15 @@ def _parse_token(self):
if self.token is None:
raise AdalError("Unable to find any tokens in RSTR.")

@staticmethod
def _parse_token_by_re(raw_response):
for rstr in findall_content(raw_response, "RequestSecurityTokenResponse"):
token_types = findall_content(rstr, "TokenType")
tokens = findall_content(rstr, "RequestedSecurityToken")
if token_types and tokens:
return tokens[0].encode('us-ascii'), token_types[0]


def parse(self):
if not self._response:
raise AdalError("Received empty RSTR response body.")
Expand All @@ -195,7 +233,12 @@ def parse(self):
str_fault_message = self.fault_message or 'NONE'
error_template = 'Server returned error in RSTR - ErrorCode: {} : FaultMessage: {}'
raise AdalError(error_template.format(str_error_code, str_fault_message))
self._parse_token()

token_found = self._parse_token_by_re(self._response)
if token_found:
self.token, self.token_type = token_found
else: # fallback to old logic
self._parse_token()
finally:
self._dom = None
self._parents = None
Expand Down
29 changes: 29 additions & 0 deletions tests/test_wstrust_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@

from adal.constants import XmlNamespaces, Errors, WSTrustVersion
from adal.wstrust_response import WSTrustResponse
from adal.wstrust_response import findall_content

_namespaces = XmlNamespaces.namespaces
_call_context = {'log_context' : {'correlation-id':'test-corr-id'}}
Expand Down Expand Up @@ -101,5 +102,33 @@ def test_rstr_unparseable_xml(self):
wstrustResponse = WSTrustResponse(_call_context, '<This is not parseable as an RSTR', WSTrustVersion.WSTRUST13)
wstrustResponse.parse()

def test_findall_content_with_comparison(self):
content = """
<saml:Assertion xmlns:saml="SAML:assertion">
<ds:Signature xmlns:ds="http://www.w3.org/2000/09/xmldsig#">
foo
</ds:Signature>
</saml:Assertion>"""
sample = ('<ns0:Wrapper xmlns:ns0="namespace0">'
+ content
+ '</ns0:Wrapper>')

# Demonstrating how XML-based parser won't give you the raw content as-is
element = ET.fromstring(sample).findall('{SAML:assertion}Assertion')[0]
assertion_via_xml_parser = ET.tostring(element)
self.assertNotEqual(content, assertion_via_xml_parser)
self.assertNotIn(b"<ds:Signature>", assertion_via_xml_parser)

# The findall_content() helper, based on Regex, will return content as-is.
self.assertEqual([content], findall_content(sample, "Wrapper"))

def test_findall_content_for_real(self):
with open(os.path.join(os.getcwd(), 'tests', 'wstrust', 'RSTR.xml')) as f:
rstr = f.read()
wstrustResponse = WSTrustResponse(_call_context, rstr, WSTrustVersion.WSTRUST13)
wstrustResponse.parse()
self.assertIn("<X509Data>", rstr)
self.assertIn(b"<X509Data>", wstrustResponse.token) # It is in bytes

if __name__ == '__main__':
unittest.main()

0 comments on commit f836ad9

Please sign in to comment.