Skip to content
This repository has been archived by the owner on Sep 29, 2023. It is now read-only.

Return xml as is #120

Merged
merged 3 commits into from
Mar 2, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 44 additions & 1 deletion adal/wstrust_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,35 @@ def scrub_rstr_log_message(response_str):

return 'RSTR Response: ' + scrubbed_rstr

def findall_content(xml_string, tag):
"""
Given a tag name without any prefix,
this function returns a list of the raw content inside this tag as-is.

>>> findall_content("<ns0:foo> what <bar> ever </bar> content </ns0:foo>", "foo")
[" what <bar> ever </bar> content "]

Motivation:

Usually we would use XML parser to extract the data by xpath.
However the ElementTree in Python will implicitly normalize the output
by "hoisting" the inner inline namespaces into the outmost element.
The result will be a semantically equivalent XML snippet,
but not fully identical to the original one.
While this effect shouldn't become a problem in all other cases,
it does not seem to fully comply with Exclusive XML Canonicalization spec
(https://www.w3.org/TR/xml-exc-c14n/), and void the SAML token signature.
SAML signature algo needs the "XML -> C14N(XML) -> Signed(C14N(Xml))" order.

The binary extention lxml is probably the canonical way to solve this
(https://stackoverflow.com/questions/22959577/python-exclusive-xml-canonicalization-xml-exc-c14n)
but here we use this workaround, based on Regex, to return raw content as-is.
"""
# \w+ is good enough for https://www.w3.org/TR/REC-xml/#NT-NameChar
pattern = r"<(?:\w+:)?%(tag)s(?:[^>]*)>(.*)</(?:\w+:)?%(tag)s" % {"tag": tag}
return re.findall(pattern, xml_string, re.DOTALL)


class WSTrustResponse(object):

def __init__(self, call_context, response, wstrust_version):
Expand Down Expand Up @@ -178,6 +207,15 @@ def _parse_token(self):
if self.token is None:
raise AdalError("Unable to find any tokens in RSTR.")

@staticmethod
def _parse_token_by_re(raw_response):
for rstr in findall_content(raw_response, "RequestSecurityTokenResponse"):
token_types = findall_content(rstr, "TokenType")
tokens = findall_content(rstr, "RequestedSecurityToken")
if token_types and tokens:
return tokens[0].encode('us-ascii'), token_types[0]


def parse(self):
if not self._response:
raise AdalError("Received empty RSTR response body.")
Expand All @@ -195,7 +233,12 @@ def parse(self):
str_fault_message = self.fault_message or 'NONE'
error_template = 'Server returned error in RSTR - ErrorCode: {} : FaultMessage: {}'
raise AdalError(error_template.format(str_error_code, str_fault_message))
self._parse_token()

token_found = self._parse_token_by_re(self._response)
if token_found:
self.token, self.token_type = token_found
else: # fallback to old logic
self._parse_token()
finally:
self._dom = None
self._parents = None
Expand Down
29 changes: 29 additions & 0 deletions tests/test_wstrust_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@

from adal.constants import XmlNamespaces, Errors, WSTrustVersion
from adal.wstrust_response import WSTrustResponse
from adal.wstrust_response import findall_content

_namespaces = XmlNamespaces.namespaces
_call_context = {'log_context' : {'correlation-id':'test-corr-id'}}
Expand Down Expand Up @@ -101,5 +102,33 @@ def test_rstr_unparseable_xml(self):
wstrustResponse = WSTrustResponse(_call_context, '<This is not parseable as an RSTR', WSTrustVersion.WSTRUST13)
wstrustResponse.parse()

def test_findall_content_with_comparison(self):
content = """
<saml:Assertion xmlns:saml="SAML:assertion">
<ds:Signature xmlns:ds="http://www.w3.org/2000/09/xmldsig#">
foo
</ds:Signature>
</saml:Assertion>"""
sample = ('<ns0:Wrapper xmlns:ns0="namespace0">'
+ content
+ '</ns0:Wrapper>')

# Demonstrating how XML-based parser won't give you the raw content as-is
element = ET.fromstring(sample).findall('{SAML:assertion}Assertion')[0]
assertion_via_xml_parser = ET.tostring(element)
self.assertNotEqual(content, assertion_via_xml_parser)
self.assertNotIn(b"<ds:Signature>", assertion_via_xml_parser)

# The findall_content() helper, based on Regex, will return content as-is.
self.assertEqual([content], findall_content(sample, "Wrapper"))

def test_findall_content_for_real(self):
with open(os.path.join(os.getcwd(), 'tests', 'wstrust', 'RSTR.xml')) as f:
rstr = f.read()
wstrustResponse = WSTrustResponse(_call_context, rstr, WSTrustVersion.WSTRUST13)
wstrustResponse.parse()
self.assertIn("<X509Data>", rstr)
self.assertIn(b"<X509Data>", wstrustResponse.token) # It is in bytes

if __name__ == '__main__':
unittest.main()