Skip to content

Commit

Permalink
six: parsers/pyMicrodata/__init__.py: headers, StringIO, urlparse, ba…
Browse files Browse the repository at this point in the history
…sestring
  • Loading branch information
joernhees committed Aug 27, 2015
1 parent 79e3f86 commit 1dab29b
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 41 deletions.
69 changes: 29 additions & 40 deletions rdflib/plugins/parsers/pyMicrodata/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,23 +41,18 @@
@license: This software is available for use under the
U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">}
@copyright: W3C
"""
"""
$Id: __init__.py,v 1.15 2012/09/05 16:40:43 ivan Exp $ $Date: 2012/09/05 16:40:43 $
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

__version__ = "1.2"
__author__ = 'Ivan Herman'
__contact__ = 'Ivan Herman, ivan@w3.org'

import sys
PY3 = (sys.version_info[0] >= 3)

if PY3 :
from io import StringIO
else :
from StringIO import StringIO

import datetime
import os
Expand All @@ -76,10 +71,9 @@
from rdflib.RDFS import RDFSNS as ns_rdfs
from rdflib.RDF import RDFNS as ns_rdf

if PY3 :
from urllib.parse import urlparse
else :
from urlparse import urlparse
from rdflib.py3compat import StringIO
from rdflib.py3compat import urlparse
from rdflib.py3compat import string_types

debug = False

Expand All @@ -98,7 +92,7 @@ class MicrodataError(Exception) :
def __init__(self, msg) :
self.msg = msg
Exception.__init__(self)

class HTTPError(MicrodataError) :
"""Raised when HTTP problems are detected. It does not add any new functionality to the
Exception class."""
Expand Down Expand Up @@ -144,19 +138,19 @@ def __init__(self, base = "", vocab_expansion = False, vocab_cache = True) :
self.base = base
self.vocab_expansion = vocab_expansion
self.vocab_cache = vocab_cache

def _generate_error_graph(self, pgraph, full_msg, uri = None) :
"""
Generate an error message into the graph. This method is usually used reacting on exceptions.
Later versions of pyMicrodata may have more detailed error conditions on which it wishes to react. At the moment, this
is fairly crude...
"""
if pgraph == None :
retval = Graph()
else :
retval = pgraph

pgraph.bind( "dc","http://purl.org/dc/terms/" )
pgraph.bind( "xsd",'http://www.w3.org/2001/XMLSchema#' )
pgraph.bind( "ht",'http://www.w3.org/2006/http#' )
Expand All @@ -166,21 +160,21 @@ def _generate_error_graph(self, pgraph, full_msg, uri = None) :
retval.add((bnode, ns_rdf["type"], ns_micro["Error"]))
retval.add((bnode, ns_dc["description"], Literal(full_msg)))
retval.add((bnode, ns_dc["date"], Literal(datetime.datetime.utcnow().isoformat(),datatype=ns_xsd["dateTime"])))

if uri != None :
htbnode = BNode()
retval.add( (bnode, ns_micro["context"],htbnode) )
retval.add( (htbnode, ns_rdf["type"], ns_ht["Request"]) )
retval.add( (htbnode, ns_ht["requestURI"], Literal(uri)) )

if self.http_status != None and self.http_status != 200:
htbnode = BNode()
retval.add( (bnode, ns_micro["context"],htbnode) )
retval.add( (htbnode, ns_rdf["type"], ns_ht["Response"]) )
retval.add( (htbnode, ns_ht["responseCode"], URIRef("http://www.w3.org/2006/http#%s" % self.http_status)) )

return retval

def _get_input(self, name) :
"""
Trying to guess whether "name" is a URI, a string; it then tries to open these as such accordingly,
Expand All @@ -190,12 +184,7 @@ def _get_input(self, name) :
@type name: string or a file-like object
@return: a file like object if opening "name" is possible and successful, "name" otherwise
"""
try :
# Python 2 branch
isstring = isinstance(name, basestring)
except :
# Python 3 branch
isstring = isinstance(name, str)
isstring = isinstance(name, string_types)

if isstring :
# check if this is a URI, ie, if there is a valid 'scheme' part
Expand All @@ -209,7 +198,7 @@ def _get_input(self, name) :
return open(name, 'rb')
else :
return name

####################################################################################################################
# Externally used methods
#
Expand All @@ -226,20 +215,20 @@ def graph_from_DOM(self, dom, graph = None) :
if graph == None :
# Create the RDF Graph, that will contain the return triples...
graph = Graph()
conversion = MicrodataConversion(dom.documentElement,
graph,
base = self.base,
vocab_expansion = self.vocab_expansion,

conversion = MicrodataConversion(dom.documentElement,
graph,
base = self.base,
vocab_expansion = self.vocab_expansion,
vocab_cache = self.vocab_cache)
conversion.convert()
return graph

def graph_from_source(self, name, graph = None, rdfOutput = False) :
"""
Extract an RDF graph from an microdata source. The source is parsed, the RDF extracted, and the RDF Graph is
returned. This is a front-end to the L{pyMicrodata.graph_from_DOM} method.
@param name: a URI, a file name, or a file-like object
@return: an RDF Graph
@rtype: rdflib Graph instance
Expand All @@ -261,7 +250,7 @@ def graph_from_source(self, name, graph = None, rdfOutput = False) :
self.http_status = 500
if not rdfOutput : raise e
return self._generate_error_graph(graph, str(e), uri=name)

dom = None
try :
import warnings
Expand All @@ -278,7 +267,7 @@ def graph_from_source(self, name, graph = None, rdfOutput = False) :
e = sys.exc_info()[1]
self.http_status = 400
if not rdfOutput : raise e
return self._generate_error_graph(graph, str(e), uri=name)
return self._generate_error_graph(graph, str(e), uri=name)

except Exception :
# Something nasty happened:-(
Expand All @@ -289,7 +278,7 @@ def graph_from_source(self, name, graph = None, rdfOutput = False) :
self.http_status = 500
if not rdfOutput : raise e
return self._generate_error_graph(graph, str(e), uri=name)

def rdf_from_sources(self, names, outputFormat = "pretty-xml", rdfOutput = False) :
"""
Extract and RDF graph from a list of RDFa sources and serialize them in one graph. The sources are parsed, the RDF
Expand Down Expand Up @@ -329,7 +318,7 @@ def processURI(uri, outputFormat, form) :
"""The standard processing of a microdata uri options in a form, ie, as an entry point from a CGI call.
The call accepts extra form options (eg, HTTP GET options) as follows:
@param uri: URI to access. Note that the "text:" and "uploaded:" values are treated separately; the former is for textual intput (in which case a StringIO is used to get the data) and the latter is for uploaded file, where the form gives access to the file directly.
@param outputFormat: serialization formats, as understood by RDFLib. Note that though "turtle" is
a possible parameter value, some versions of the RDFLib turtle generation does funny (though legal) things with
Expand Down Expand Up @@ -370,7 +359,7 @@ def _get_option(param, compare_value, default) :
# Decide the output format; the issue is what should happen in case of a top level error like an inaccessibility of
# the html source: should a graph be returned or an HTML page with an error message?

# decide whether HTML or RDF should be sent.
# decide whether HTML or RDF should be sent.
htmlOutput = False
#if 'HTTP_ACCEPT' in os.environ :
# acc = os.environ['HTTP_ACCEPT']
Expand Down Expand Up @@ -404,7 +393,7 @@ def _get_option(param, compare_value, default) :
import cgi
h = sys.exc_info()[1]
retval = 'Content-type: text/html; charset=utf-8\nStatus: %s \n\n' % h.http_code
retval += "<html>\n"
retval += "<html>\n"
retval += "<head>\n"
retval += "<title>HTTP Error in Microdata processing</title>\n"
retval += "</head><body>\n"
Expand All @@ -422,7 +411,7 @@ def _get_option(param, compare_value, default) :
import traceback, cgi

retval = 'Content-type: text/html; charset=utf-8\nStatus: %s\n\n' % processor.http_status
retval += "<html>\n"
retval += "<html>\n"
retval += "<head>\n"
retval += "<title>Exception in Microdata processing</title>\n"
retval += "</head><body>\n"
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def setup_python3():
join(tmp_src, 'rdflib', 'plugins', 'parsers', 'nquads.py'),
join(tmp_src, 'rdflib', 'plugins', 'parsers', 'nt.py'),
join(tmp_src, 'rdflib', 'plugins', 'parsers', 'ntriples.py'),
# join(tmp_src, 'rdflib', 'plugins', 'parsers', 'pyMicrodata', '__init__.py'),
join(tmp_src, 'rdflib', 'plugins', 'parsers', 'pyMicrodata', '__init__.py'),
# join(tmp_src, 'rdflib', 'plugins', 'parsers', 'pyMicrodata', 'microdata.py'),
join(tmp_src, 'rdflib', 'plugins', 'parsers', 'pyMicrodata', 'registry.py'),
# join(tmp_src, 'rdflib', 'plugins', 'parsers', 'pyMicrodata', 'utils.py'),
Expand Down

0 comments on commit 1dab29b

Please sign in to comment.