Skip to content

Commit

Permalink
Basic ignore empty OpenGraph props
Browse files Browse the repository at this point in the history
  • Loading branch information
Cristi Constantin committed Jul 16, 2019
1 parent 50a0915 commit 8e69411
Show file tree
Hide file tree
Showing 5 changed files with 11 additions and 1 deletion.
5 changes: 5 additions & 0 deletions extruct/_extruct.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def extract(htmlstring,
if errors not in ['log', 'ignore', 'strict']:
raise ValueError('Invalid error command, valid values are either "log"'
', "ignore" or "strict"')

try:
tree = parse_xmldom_html(htmlstring, encoding=encoding)
except Exception as e:
Expand All @@ -65,6 +66,7 @@ def extract(htmlstring,
return {}
if errors == 'strict':
raise

processors = []
if 'microdata' in syntaxes:
processors.append(
Expand Down Expand Up @@ -95,6 +97,7 @@ def extract(htmlstring,
('rdfa', RDFaExtractor().extract_items,
tree,
))

output = {}
for syntax, extract, document in processors:
try:
Expand All @@ -108,6 +111,7 @@ def extract(htmlstring,
pass
if errors == 'strict':
raise

if uniform:
uniform_processors = []
if 'microdata' in syntaxes:
Expand All @@ -131,6 +135,7 @@ def extract(htmlstring,
output['opengraph'],
None,
))

for syntax, uniform, raw, schema_context in uniform_processors:
try:
if syntax == 'opengraph':
Expand Down
2 changes: 2 additions & 0 deletions extruct/opengraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ def extract_items(self, document, base_url=None):
for el in head.xpath('meta[@property and @content]'):
prop = el.attrib['property']
val = el.attrib['content']
if prop == '' or val == '':
continue
ns = prop.partition(':')[0]
if ns in _OG_NAMESPACES:
namespaces[ns] = _OG_NAMESPACES[ns]
Expand Down
1 change: 1 addition & 0 deletions tests/samples/songkick/elysianfields.html
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
<meta property="og:type" content="songkick-concerts:artist">
<meta property="og:title" content="Elysian Fields">
<meta property="og:description" content="Buy tickets for an upcoming Elysian Fields concert near you. List of all Elysian Fields tickets and tour dates for 2017.">
<meta property="og:description" content="" />
<meta property="og:url" content="http://www.songkick.com/artists/236156-elysian-fields">
<meta property="og:image" content="http://images.sk-static.com/images/media/img/col4/20100330-103600-169450.jpg">
<meta property="og:image" content="http://images.sk-static.com/SECONDARY_IMAGE.jpg">
Expand Down
3 changes: 3 additions & 0 deletions tests/samples/songkick/elysianfields.json
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,9 @@
"http://ogp.me/ns#description": [
{
"@value": "Buy tickets for an upcoming Elysian Fields concert near you. List of all Elysian Fields tickets and tour dates for 2017."
},
{
"@value": ""
}
],
"http://ogp.me/ns#image": [
Expand Down
1 change: 0 additions & 1 deletion tests/test_extruct.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import pytest

import extruct
from extruct import SYNTAXES
from tests import get_testdata, jsonize_dict, replace_node_ref_with_node_id


Expand Down

0 comments on commit 8e69411

Please sign in to comment.