Skip to content

Commit

Permalink
add utils and stats submodules
Browse files Browse the repository at this point in the history
  • Loading branch information
Guts committed Sep 2, 2019
1 parent 584f536 commit 85f881d
Show file tree
Hide file tree
Showing 3 changed files with 383 additions and 0 deletions.
5 changes: 5 additions & 0 deletions isogeo_to_docx/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# coding: utf-8
#! python3 # noqa: E265

from .formatter import Formatter # noqa: F401
from .stats import Stats # noqa: F401
203 changes: 203 additions & 0 deletions isogeo_to_docx/utils/formatter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
# -*- coding: UTF-8 -*-

# ------------------------------------------------------------------------------
# Name: Isogeo to Microsoft Word 2010
# Purpose: Get metadatas from an Isogeo share and store it into
# a Word document for each metadata. It's one of the submodules
# of isogeo2office (https://github.com/isogeo/isogeo-2-office).
#
# Author: Julien Moura (@geojulien) for Isogeo
#
# Python: 2.7.x
# Created: 14/08/2014
# Updated: 28/01/2016
# ------------------------------------------------------------------------------

# ##############################################################################
# ########## Libraries #############
# ##################################

# Standard library
import logging
from urllib.parse import urlparse

# 3rd party library
from isogeo_pysdk import (
Condition,
IsogeoTranslator,
IsogeoUtils,
License,
Limitation,
Specification,
)

# ##############################################################################
# ############ Globals ############
# #################################

logger = logging.getLogger("isogeo_to_docx") # LOG
utils = IsogeoUtils()

# ##############################################################################
# ########## Classes ###############
# ##################################


class Formatter(object):
"""Metadata formatter to avoid repeat operations on metadata during export in different formats.
:param str lang: selected language
:param str output_type: name of output type to format for. Defaults to 'Excel'
:param tuple default_values: values used to replace missing values. Structure:
(
str_for_missing_strings_and_integers,
str_for_missing_dates
)
"""

def __init__(
self,
lang="FR",
output_type="Excel",
default_values=("NR", "1970-01-01T00:00:00+00:00"),
):
# locale
self.lang = lang.lower()
if lang == "fr":
self.dates_fmt = "DD/MM/YYYY"
self.locale_fmt = "fr_FR"
else:
self.dates_fmt = "YYYY/MM/DD"
self.locale_fmt = "uk_UK"

# store params and imports as attributes
self.output_type = output_type.lower()
self.defs = default_values
self.isogeo_tr = IsogeoTranslator(lang).tr

# ------------ Metadata sections formatter --------------------------------
def conditions(self, md_cgus: list) -> list:
"""Render input metadata CGUs as a new list.
:param dict md_cgus: input dictionary extracted from an Isogeo metadata
"""
cgus_out = []
for c_in in md_cgus:
if not isinstance(c_in, dict):
logger.error("Condition expects a dict, not '{}'".format(type(c_in)))
continue
cgu_out = {}
# load condition object
condition_in = Condition(**c_in)
cgu_out["description"] = condition_in.description
if isinstance(condition_in.license, License):
lic = condition_in.license
cgu_out["name"] = lic.name
cgu_out["link"] = lic.link
cgu_out["content"] = lic.content
else:
cgu_out["name"] = self.isogeo_tr("conditions", "noLicense")

# store into the final list
cgus_out.append(
"{} {}. {} {}".format(
cgu_out.get("name"),
cgu_out.get("description", ""),
cgu_out.get("content", ""),
cgu_out.get("link", ""),
)
)
# return formatted result
return cgus_out

def limitations(self, md_limitations: list) -> list:
"""Render input metadata limitations as a new list.
:param dict md_limitations: input dictionary extracted from an Isogeo metadata
"""
lims_out = []
for l_in in md_limitations:
limitation = {}
# ensure other fields
limitation["description"] = l_in.get("description", "")
limitation["type"] = self.isogeo_tr("limitations", l_in.get("type"))
# legal type
if l_in.get("type") == "legal":
limitation["restriction"] = self.isogeo_tr(
"restrictions", l_in.get("restriction")
)
else:
pass
# INSPIRE precision
if "directive" in l_in.keys():
limitation["inspire"] = l_in.get("directive").get("name")

limitation["content"] = l_in.get("directive").get("description")

else:
pass

# store into the final list
lims_out.append(
"{} {}. {} {} {}".format(
limitation.get("type"),
limitation.get("description", ""),
limitation.get("restriction", ""),
limitation.get("content", ""),
limitation.get("inspire", ""),
)
)
# return formatted result
return lims_out

def specifications(self, md_specifications: list) -> list:
"""Render input metadata specifications as a new list.
:param dict md_specifications: input dictionary extracted from an Isogeo metadata
"""
specs_out = []
for s_in in md_specifications:
spec_in = Specification(**s_in.get("specification"))
spec_out = {}
# translate specification conformity
if s_in.get("conformant"):
spec_out["conformity"] = self.isogeo_tr("quality", "isConform")
else:
spec_out["conformity"] = self.isogeo_tr("quality", "isNotConform")
# ensure other fields
spec_out["name"] = spec_in.name
spec_out["link"] = spec_in.link
# make data human readable
if spec_in.published:
spec_date = utils.hlpr_datetimes(spec_in.published).strftime(
self.dates_fmt
)
else:
logger.warning(
"Publication date is missing in the specification '{} ({})'".format(
spec_in.name, spec_in._tag
)
)
spec_date = ""
spec_out["date"] = spec_date
# store into the final list
specs_out.append(
"{} {} {} - {}".format(
spec_out.get("name"),
spec_out.get("date"),
spec_out.get("link"),
spec_out.get("conformity"),
)
)

# return formatted result
return specs_out


# ###############################################################################
# ###### Stand alone program ########
# ###################################
if __name__ == "__main__":
"""Try me"""
formatter = Formatter()
175 changes: 175 additions & 0 deletions isogeo_to_docx/utils/stats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
# -*- coding: UTF-8 -*-
#!/usr/bin/env python
from __future__ import absolute_import, print_function, unicode_literals

# ----------------------------------------------------------------------------
# Name: OpenCatalog to Excel
# Purpose: Get metadatas from an Isogeo OpenCatlog and store it into
# an Excel workbook.
#
# Author: Isogeo
#
# Python: 2.7.x
# Created: 14/08/2014
# Updated: 28/01/2016
# ----------------------------------------------------------------------------

# ###########################################################################
# ########## Libraries ##########
# ###############################

# Standard library
from collections import Counter, defaultdict
import logging

# 3rd party library
from openpyxl.chart import BarChart, Reference


# ##############################################################################
# ############ Globals ############
# #################################

# LOG
logger = logging.getLogger("isogeo_to_docx")

# ############################################################################
# ######## Classes ###############
# ################################


class Stats(object):
"""Doc for Isogeo."""

md_empty_fields = defaultdict(list)
md_types_repartition = defaultdict(int)
md_tags_occurences = defaultdict(int)

def __init__(self, lang=None):
"""Instanciate stats class."""
# self._ = _
super(Stats, self).__init__()

def fillfull(self):
"""Calculate fields fillfull level."""
return "HOHOHOHO"

def week_work(self, search_results=list):
"""Return histogram data to represent cataloging activity per week."""
for md in search_results:
print(md.get("type", "No md, no type"))

return "weekly baby!"

# def type_pie(self, sheet, total=20):
# """Return histogram data to represent cataloging activity per week."""
# data = (
# (_("Type"), _("Count")),
# (_("Vector"), self.md_types_repartition.get("vector", 0)),
# (_("Raster"), self.md_types_repartition.get("raster", 0)),
# (_("Service"), self.md_types_repartition.get("service", 0)),
# (_("Resource"), self.md_types_repartition.get("resource", 0)),
# )

# # write data into worksheet
# for row in data:
# sheet.append(row)

# # Pie chart
# pie = PieChart()
# labels = Reference(sheet, min_col=1, min_row=2, max_row=5)
# data = Reference(sheet, min_col=2, min_row=1, max_row=5)
# pie.add_data(data, titles_from_data=True)
# pie.set_categories(labels)
# pie.title = _("Metadata by types")

# # Cut the first slice out of the pie
# slice = DataPoint(idx=0, explosion=20)
# pie.series[0].data_points = [slice]

# return pie

def keywords_bar(self, sheet, results, total=20):
"""Return histogram data to represent cataloging activity per week."""
# tags parsing
li_keywords = []
li_inspire = []
for md in results:
li_keywords.extend(
(
i.get("text")
for i in md.get("keywords", [])
if i.get("_tag").startswith("keyword:is")
)
)
li_inspire.extend(
(
i.get("text")
for i in md.get("keywords", [])
if i.get("_tag").startswith("keyword:in")
)
)
keywords = Counter(li_keywords)
inspire = Counter(li_inspire)

data_k = [("Keyword", "Count")]
for k, c in keywords.most_common(50):
data_k.append((k, c))

# write data into worksheet
for row in data_k:
sheet.append(row)

bar = BarChart()
bar.type = "bar"
bar.style = 10
bar.title = "Keywords by occurrences"
bar.y_axis.title = "Occurences"
bar.x_axis.title = "Keywords"

data = Reference(sheet, min_col=2, min_row=1, max_row=50, max_col=3)
cats = Reference(sheet, min_col=1, min_row=2, max_row=50)
bar.add_data(data, titles_from_data=True)
bar.set_categories(cats)
bar.shape = 4

return bar


# ############################################################################
# ###### Stand alone program ########
# ###################################
if __name__ == "__main__":
"""Standalone execution and tests."""
from os import environ
from isogeo_pysdk import Isogeo, __version__ as pysdk_version
from openpyxl import Workbook

# API access
share_id = environ.get("ISOGEO_API_DEV_ID")
share_token = environ.get("ISOGEO_API_DEV_SECRET")
isogeo = Isogeo(client_id=share_id, client_secret=share_token)
bearer = isogeo.connect()

# search
search = isogeo.search(bearer, whole_results=0, include=["keywords"])

# workbook
wb = Workbook()
# ws = wb.active

# this app
app = Stats()
# app.week_work(search.get("results"))
# print(type(app.fillfull()))

# metadata types
ws_d = wb.create_sheet(title="Dashboard")
# # pie = app.type_pie(ws_d,
# search.get('total'))
# # ws_d.add_chart(pie, "D1")

bar = app.keywords_bar(ws_d, search.get("results"))
ws_d.add_chart(bar, "A10")
# write xlsx
wb.save("test.xlsx")

0 comments on commit 85f881d

Please sign in to comment.