From 44f0f69a9a385de904a80bd62f78590be65717b1 Mon Sep 17 00:00:00 2001
From: Ray Luo <rayluo@microsoft.com>
Date: Fri, 9 Feb 2018 15:23:58 -0800
Subject: [PATCH 1/3] Use regex to grab xml content as-is

---
 adal/wstrust_response.py       | 30 +++++++++++++++++++++++++++++-
 tests/test_wstrust_response.py | 18 ++++++++++++++++++
 2 files changed, 47 insertions(+), 1 deletion(-)
diff --git a/adal/wstrust_response.py b/adal/wstrust_response.py
index ecdc398f..cfc57c3f 100644
--- a/adal/wstrust_response.py
+++ b/adal/wstrust_response.py
@@ -55,6 +55,20 @@ def scrub_rstr_log_message(response_str):
 
     return 'RSTR Response: ' + scrubbed_rstr
 
+def findall_content(xml_string, tag):
+    """
+    Given a tag name without any prefix,
+    this function returns a list of the raw content inside this tag as-is.
+
+    >>> findall_content("<ns0:foo> what <bar> ever </bar> content </ns0:foo>", "foo")
+    [" what <bar> ever </bar> content "]
+
+    """
+    # https://www.w3.org/TR/REC-xml/#NT-NameChar
+    pattern = r"<(?:\w+:)?%(tag)s>(.*)</(?:\w+:)?%(tag)s" % {"tag": tag}
+    return re.findall(pattern, xml_string, re.DOTALL)
+
+
 class WSTrustResponse(object):
 
     def __init__(self, call_context, response, wstrust_version):
@@ -178,6 +192,15 @@ def _parse_token(self):
         if self.token is None:
             raise AdalError("Unable to find any tokens in RSTR.")
 
+    @staticmethod
+    def _parse_token_by_re(raw_response):
+        for rstr in findall_content(raw_response, "RequestSecurityTokenResponse"):
+            token_types = findall_content(rstr, "TokenType")
+            tokens = findall_content(rstr, "RequestedSecurityToken")
+            if token_types and tokens:
+                return tokens[0].encode('us-ascii'), token_types[0]
+
+
     def parse(self):
         if not self._response:
             raise AdalError("Received empty RSTR response body.")
@@ -195,7 +218,12 @@ def parse(self):
                 str_fault_message = self.fault_message or 'NONE'
                 error_template = 'Server returned error in RSTR - ErrorCode: {} : FaultMessage: {}'
                 raise AdalError(error_template.format(str_error_code, str_fault_message))
-            self._parse_token()
+
+            token_found = self._parse_token_by_re(self._response)
+            if token_found:
+                self.token, self.token_type = token_found
+            else:  # fallback to old logic
+                self._parse_token()
         finally:
             self._dom = None
             self._parents = None
diff --git a/tests/test_wstrust_response.py b/tests/test_wstrust_response.py
index e0b12889..bea744fc 100644
--- a/tests/test_wstrust_response.py
+++ b/tests/test_wstrust_response.py
@@ -36,6 +36,7 @@
 
 from adal.constants import XmlNamespaces, Errors, WSTrustVersion
 from adal.wstrust_response import WSTrustResponse
+from adal.wstrust_response import findall_content
 
 _namespaces = XmlNamespaces.namespaces
 _call_context = {'log_context' : {'correlation-id':'test-corr-id'}}
@@ -101,5 +102,22 @@ def test_rstr_unparseable_xml(self):
             wstrustResponse = WSTrustResponse(_call_context, '<This is not parseable as an RSTR', WSTrustVersion.WSTRUST13)
             wstrustResponse.parse()
 
+    def test_findall_content(self):
+        content = """
+ what <bar> ever </bar> content
+in multiple lines
+"""
+        sample = "<ns0:foo>" + content + "</ns0:foo>"
+        self.assertEqual([content], findall_content(sample, "foo"))
+        self.assertEqual([], findall_content(sample, "nonexist"))
+
+    def test_findall_content_for_real(self):
+        with open(os.path.join(os.getcwd(), 'tests', 'wstrust', 'RSTR.xml')) as f:
+            rstr = f.read()
+        wstrustResponse = WSTrustResponse(_call_context, rstr, WSTrustVersion.WSTRUST13)
+        wstrustResponse.parse()
+        self.assertIn("<X509Data>", rstr)
+        self.assertIn(b"<X509Data>", wstrustResponse.token)  # It is in bytes
+
 if __name__ == '__main__':
     unittest.main()

From 7a8dc4581f54db50dae68288f9bf751c66a9ac11 Mon Sep 17 00:00:00 2001
From: Ray Luo <rayluo@microsoft.com>
Date: Tue, 20 Feb 2018 16:55:37 -0800
Subject: [PATCH 2/3] Add extra documentation and test the comparison

---
 adal/wstrust_response.py       | 13 +++++++++++--
 tests/test_wstrust_response.py | 25 ++++++++++++++++++-------
 2 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/adal/wstrust_response.py b/adal/wstrust_response.py
index cfc57c3f..33690ccb 100644
--- a/adal/wstrust_response.py
+++ b/adal/wstrust_response.py
@@ -63,9 +63,18 @@ def findall_content(xml_string, tag):
     >>> findall_content("<ns0:foo> what <bar> ever </bar> content </ns0:foo>", "foo")
     [" what <bar> ever </bar> content "]
 
+    Usually we would use XML parser to extract the data by xpath.
+    However the ElementTree in Python will implicitly normalize the output
+    by "hoisting" the inner inline namespaces into the outmost element.
+    The result will be a semantically equivalent XML snippet,
+    but not fully identical to the original one.
+    While this shouldn't become a problem,
+    in practice it could potentially confuse a picky recipient.
+
+    Introducing this helper, based on Regex, which will return raw content as-is.
     """
-    # https://www.w3.org/TR/REC-xml/#NT-NameChar
-    pattern = r"<(?:\w+:)?%(tag)s>(.*)</(?:\w+:)?%(tag)s" % {"tag": tag}
+    # \w+ is good enough for https://www.w3.org/TR/REC-xml/#NT-NameChar
+    pattern = r"<(?:\w+:)?%(tag)s(?:[^>]*)>(.*)</(?:\w+:)?%(tag)s" % {"tag": tag}
     return re.findall(pattern, xml_string, re.DOTALL)
 
 
diff --git a/tests/test_wstrust_response.py b/tests/test_wstrust_response.py
index bea744fc..913ed879 100644
--- a/tests/test_wstrust_response.py
+++ b/tests/test_wstrust_response.py
@@ -102,14 +102,25 @@ def test_rstr_unparseable_xml(self):
             wstrustResponse = WSTrustResponse(_call_context, '<This is not parseable as an RSTR', WSTrustVersion.WSTRUST13)
             wstrustResponse.parse()
 
-    def test_findall_content(self):
+    def test_findall_content_with_comparison(self):
         content = """
- what <bar> ever </bar> content
-in multiple lines
-"""
-        sample = "<ns0:foo>" + content + "</ns0:foo>"
-        self.assertEqual([content], findall_content(sample, "foo"))
-        self.assertEqual([], findall_content(sample, "nonexist"))
+            <saml:Assertion xmlns:saml="SAML:assertion">
+                <ds:Signature xmlns:ds="http://www.w3.org/2000/09/xmldsig#">
+                foo
+                </ds:Signature>
+            </saml:Assertion>"""
+        sample = ('<ns0:Wrapper xmlns:ns0="namespace0">'
+            + content
+            + '</ns0:Wrapper>')
+
+        # Demonstrating how XML-based parser won't give you the raw content as-is
+        element = ET.fromstring(sample).findall('{SAML:assertion}Assertion')[0]
+        assertion_via_xml_parser = ET.tostring(element)
+        self.assertNotEqual(content, assertion_via_xml_parser)
+        self.assertNotIn(b"<ds:Signature>", assertion_via_xml_parser)
+
+        # The findall_content() helper, based on Regex, will return content as-is.
+        self.assertEqual([content], findall_content(sample, "Wrapper"))
 
     def test_findall_content_for_real(self):
         with open(os.path.join(os.getcwd(), 'tests', 'wstrust', 'RSTR.xml')) as f:

From f449f2f7d8ae05dd2feb6debcb0d7e411e1bc2ab Mon Sep 17 00:00:00 2001
From: Ray Luo <rayluo@microsoft.com>
Date: Fri, 2 Mar 2018 12:14:37 -0800
Subject: [PATCH 3/3] Further clarify the rationale of this workaround based on
 research

---
 adal/wstrust_response.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/adal/wstrust_response.py b/adal/wstrust_response.py
index 33690ccb..5b2f5eea 100644
--- a/adal/wstrust_response.py
+++ b/adal/wstrust_response.py
@@ -63,15 +63,21 @@ def findall_content(xml_string, tag):
     >>> findall_content("<ns0:foo> what <bar> ever </bar> content </ns0:foo>", "foo")
     [" what <bar> ever </bar> content "]
 
+    Motivation:
+
     Usually we would use XML parser to extract the data by xpath.
     However the ElementTree in Python will implicitly normalize the output
     by "hoisting" the inner inline namespaces into the outmost element.
     The result will be a semantically equivalent XML snippet,
     but not fully identical to the original one.
-    While this shouldn't become a problem,
-    in practice it could potentially confuse a picky recipient.
-
-    Introducing this helper, based on Regex, which will return raw content as-is.
+    While this effect shouldn't become a problem in all other cases,
+    it does not seem to fully comply with Exclusive XML Canonicalization spec
+    (https://www.w3.org/TR/xml-exc-c14n/), and void the SAML token signature.
+    SAML signature algo needs the "XML -> C14N(XML) -> Signed(C14N(Xml))" order.
+
+    The binary extention lxml is probably the canonical way to solve this
+    (https://stackoverflow.com/questions/22959577/python-exclusive-xml-canonicalization-xml-exc-c14n)
+    but here we use this workaround, based on Regex, to return raw content as-is.
     """
     # \w+ is good enough for https://www.w3.org/TR/REC-xml/#NT-NameChar
     pattern = r"<(?:\w+:)?%(tag)s(?:[^>]*)>(.*)</(?:\w+:)?%(tag)s" % {"tag": tag}