diff --git a/extruct/opengraph.py b/extruct/opengraph.py index e5b97dae..978d25ab 100644 --- a/extruct/opengraph.py +++ b/extruct/opengraph.py @@ -30,8 +30,8 @@ def extract_items(self, document, base_url=None): namespaces.update(self.get_namespaces(head)) props = [] for el in head.xpath('meta[@property and @content]'): - prop = el.attrib['property'] - val = el.attrib['content'] + prop = el.attrib['property'].strip() + val = el.attrib['content'].strip() if prop == '' or val == '': continue ns = prop.partition(':')[0] diff --git a/tests/samples/songkick/elysianfields.json b/tests/samples/songkick/elysianfields.json index 0f94c14e..92509e1d 100644 --- a/tests/samples/songkick/elysianfields.json +++ b/tests/samples/songkick/elysianfields.json @@ -231,18 +231,18 @@ ], "http://ogp.me/ns#description": [ { - "@value": "Buy tickets for an upcoming Elysian Fields concert near you. List of all Elysian Fields tickets and tour dates for 2017." + "@value": "" }, { - "@value": "" + "@value": "Buy tickets for an upcoming Elysian Fields concert near you. List of all Elysian Fields tickets and tour dates for 2017." } ], "http://ogp.me/ns#image": [ { - "@value": "http://images.sk-static.com/images/media/img/col4/20100330-103600-169450.jpg" + "@value": "http://images.sk-static.com/SECONDARY_IMAGE.jpg" }, { - "@value": "http://images.sk-static.com/SECONDARY_IMAGE.jpg" + "@value": "http://images.sk-static.com/images/media/img/col4/20100330-103600-169450.jpg" } ], "http://ogp.me/ns#site_name": [ diff --git a/tests/test_extruct.py b/tests/test_extruct.py index bb79af8f..3b8ae110 100644 --- a/tests/test_extruct.py +++ b/tests/test_extruct.py @@ -16,9 +16,10 @@ def test_all(self): body = get_testdata('songkick', 'elysianfields.html') expected = json.loads(get_testdata('songkick', 'elysianfields.json').decode('UTF-8')) data = extruct.extract(body, base_url='http://www.songkick.com/artists/236156-elysian-fields') - # See test_rdfa_not_preserving_order() - del data['rdfa'][0]['http://ogp.me/ns#image'] - del expected['rdfa'][0]['http://ogp.me/ns#image'] + for rdf in data['rdfa']: + for key, pairs in rdf.items(): + if ':' in key and isinstance(pairs, list): + rdf[key] = sorted(pairs, key=lambda e: e["@value"]) self.assertEqual(jsonize_dict(data), expected) @pytest.mark.xfail