-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
to s3 #2
to s3 #2
Changes from 5 commits
72d1a5c
68bd54d
f4b9ee9
2f5c733
f1b2a71
49b6dea
20b9fb4
2ef8ca4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +0,0 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
import io | ||
import os | ||
|
||
from .generator import Generator | ||
|
||
VERSION_FILE = os.path.join(os.path.dirname(__file__), 'VERSION') | ||
|
||
__version__ = io.open(VERSION_FILE, encoding='utf-8').readline().strip() | ||
|
||
__all__ = ['Generator'] | ||
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
import os | ||
import logging | ||
|
||
|
||
def generate_path(file_path, base_path='', datapackage={}): | ||
format_params = {'version': 'latest'} | ||
format_params.update(datapackage) | ||
try: | ||
base_path = base_path.format(**format_params) | ||
except KeyError: | ||
logging.error('datapackage.json is missing property: %s' % KeyError) | ||
raise | ||
return os.path.join(base_path, file_path) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
import boto3 | ||
|
||
from datapackage_pipelines.lib.dump.dumper_base import CSVDumper | ||
from datapackage_pipelines_aws import helpers | ||
|
||
|
||
class S3Dumper(CSVDumper): | ||
|
||
def initialize(self, params): | ||
super(S3Dumper, self).initialize(params) | ||
self.bucket = params['bucket'] | ||
self.client = boto3.client('s3') | ||
self.base_path = params.get('path', '') | ||
|
||
def prepare_datapackage(self, datapackage, _): | ||
self.datapackage = datapackage | ||
return datapackage | ||
|
||
def write_file_to_output(self, filename, path): | ||
key = helpers.generate_path(path, self.base_path, self.datapackage) | ||
self.client.put_object( | ||
Body=open(filename, 'rb'), Bucket=self.bucket, Key=key) | ||
|
||
|
||
S3Dumper()() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
import unittest | ||
|
||
from datapackage_pipelines_aws import helpers | ||
|
||
class TestToS3Proccessor(unittest.TestCase): | ||
def test_generate_path(self): | ||
inpath = 'datapackage.json' | ||
basepath = 'my/test/path' | ||
expected = 'my/test/path/datapackage.json' | ||
datapackage = {'name': 'my-package'} | ||
out = helpers.generate_path(inpath, basepath, datapackage) | ||
self.assertEquals(out, expected) | ||
|
||
def test_generate_path_with_formated_string(self): | ||
inpath = 'datapackage.json' | ||
basepath = 'my/test/path/{owner}/{name}/{version}' | ||
expected = 'my/test/path/me/my-package/latest/datapackage.json' | ||
datapackage = {'name': 'my-package', 'owner': 'me'} | ||
out = helpers.generate_path(inpath, basepath, datapackage) | ||
self.assertEquals(out, expected) | ||
|
||
def test_generate_path_errors_without_owner_in_datapackage(self): | ||
inpath = 'datapackage.json' | ||
basepath = 'my/test/path/{owner}/{name}/{version}' | ||
expected = 'my/test/path/me/my-package/latest/datapackage.json' | ||
datapackage = {'name': 'my-package',} | ||
with self.assertRaises(KeyError) as context: | ||
helpers.generate_path(inpath, basepath, datapackage) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
import json | ||
import os | ||
import unittest | ||
|
||
from moto import mock_s3 | ||
import boto3 | ||
|
||
from datapackage_pipelines.utilities.lib_test_helpers import ( | ||
mock_processor_test | ||
) | ||
|
||
import datapackage_pipelines_aws.processors | ||
|
||
import logging | ||
log = logging.getLogger(__name__) | ||
|
||
|
||
class TestToS3Proccessor(unittest.TestCase): | ||
def setUp(self): | ||
self.bucket = 'my.test.bucket' | ||
self.resources = [{ | ||
'name': 'resource', | ||
"format": "csv", | ||
"path": "data/test.csv", | ||
"schema": { | ||
"fields": [ | ||
{ | ||
"name": "Date", | ||
"type": "date", | ||
}, | ||
{ | ||
"name": "Name", | ||
"type": "string", | ||
} | ||
] | ||
} | ||
}] | ||
self.datapackage = { | ||
'owner': 'me', | ||
'name': 'my-datapackage', | ||
'project': 'my-project', | ||
'resources': self.resources | ||
} | ||
self.params = { | ||
'bucket': self.bucket, | ||
'path': 'my/test/path/{owner}/{name}/{version}' | ||
} | ||
# Path to the processor we want to test | ||
self.processor_dir = \ | ||
os.path.dirname(datapackage_pipelines_aws.processors.__file__) | ||
self.processor_path = os.path.join(self.processor_dir, 'to_s3.py') | ||
|
||
|
||
@mock_s3 | ||
def test_puts_datapackage_on_s3(self): | ||
s3 = boto3.resource('s3') | ||
s3.create_bucket(Bucket=self.bucket) | ||
|
||
mock_processor_test(self.processor_path, | ||
(self.params, | ||
self.datapackage, | ||
iter([]))) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should be a list of lists of the resource data. e.g.:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm getting this error in that case
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Okay, try this:
We should probably find a wrapper for that. |
||
|
||
keys = [] | ||
for bucket in s3.buckets.all(): | ||
for key in bucket.objects.all(): | ||
keys.append(key.key) | ||
|
||
self.assertEquals(len(keys), 1) | ||
res_path = 'my/test/path/me/my-datapackage/latest/datapackage.json' | ||
self.assertEqual(res_path,keys[0]) | ||
|
||
content = s3.Object(self.bucket, res_path).get()['Body']\ | ||
.read().decode("utf-8") | ||
self.assertDictEqual(json.loads(content), self.datapackage) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,8 +6,8 @@ envlist= | |
|
||
[testenv] | ||
deps= | ||
mock | ||
requests-mock | ||
google-compute-engine | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. google-compute-engine? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. boto was complaining on Travis
This answer summarises the reason GoogleCloudPlatform/compute-image-packages#262 (comment) |
||
moto | ||
pytest | ||
pytest-cov | ||
coverage | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What about AWS access key and secret?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@akariv boto3 takes care of credential if they are set-up. It looks up in aws config file or searches for ENV variables http://boto3.readthedocs.io/en/latest/guide/configuration.html#aws-config-file
But we can have them as a part of the spec as well if that is a case