-
Notifications
You must be signed in to change notification settings - Fork 0
/
template.yaml
140 lines (128 loc) · 3.77 KB
/
template.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
AWSTemplateFormatVersion: '2010-09-09'
Transform: AWS::Serverless-2016-10-31
Description: >
Serverless Report Service
Globals:
Function:
Runtime: python3.9
Parameters:
ApiUrl:
NoEcho: true
Type: String
Description: "REQUIRED: External API url for making requests"
LakeBucketName:
Type: String
Description: "REQUIRED: Lake Bucket name"
MarketCategory:
Type: String
Description: "REQUIRED: Market Category to pass on API request"
GlueCrawlerName:
Type: String
Description: "REQUIRED: Glue Crawler name to start when new data in S3"
Resources:
CronFunction:
Type: AWS::Serverless::Function
Properties:
Description: Makes an API request on a schedule and uploads data to S3
CodeUri: functions/cron/
Handler: main.lambda_handler
Timeout: 60
Events:
DailySchedule:
Type: Schedule
Properties:
Schedule: cron(0 8 * * ? *) # every day at 5am
Name: every-day-5am
Environment:
Variables:
API_URL: !Ref ApiUrl
LAKE_BUCKET_NAME: !Ref LakeBucketName
MARKET_CATEGORY: !Ref MarketCategory
Policies:
Statement:
- Effect: Allow
Action:
- s3:PutObject
Resource:
!Sub
- "arn:aws:s3:::${BucketName}/*"
- {BucketName: !Ref LakeBucketName}
LakeBucket:
Type: AWS::S3::Bucket
Properties:
BucketName: !Ref LakeBucketName
S3Function:
Type: AWS::Serverless::Function
Properties:
Description: Reads new .csv data from S3 and triggers glue crawler
CodeUri: functions/s3/
Handler: main.lambda_handler
Timeout: 60
Events:
FileUploadToS3:
Type: S3
Properties:
Bucket: !Ref LakeBucket
Events: s3:ObjectCreated:*
Filter:
S3Key:
Rules:
- Name: suffix
Value: '.csv'
Environment:
Variables:
GLUE_CRAWLER_NAME: !Ref GlueCrawlerName
Policies:
- AWSGlueServiceRole
CoinsMarketCrawlerRole:
Type: AWS::IAM::Role
Properties:
AssumeRolePolicyDocument:
Version: "2012-10-17"
Statement:
-
Effect: "Allow"
Principal:
Service:
- "glue.amazonaws.com"
Action:
- "sts:AssumeRole"
Path: "/"
ManagedPolicyArns:
['arn:aws:iam::aws:policy/service-role/AWSGlueServiceRole']
Policies:
-
PolicyName: "S3BucketAccessPolicy"
PolicyDocument:
Version: "2012-10-17"
Statement:
-
Effect: "Allow"
Action:
- "s3:GetObject"
- "s3:PutObject"
Resource:
!Join
- ''
- - !GetAtt LakeBucket.Arn
- "*"
CoinsMarketDatabase:
Type: AWS::Glue::Database
Properties:
CatalogId: !Ref AWS::AccountId
DatabaseInput:
Name: "coins-market-database"
CoinsMarketCrawler:
Type: AWS::Glue::Crawler
Properties:
Name: "coins-market-crawler"
Description: Crawler for Coins Market Data in S3.
Role: !GetAtt CoinsMarketCrawlerRole.Arn
DatabaseName: !Ref CoinsMarketDatabase
SchemaChangePolicy:
UpdateBehavior: "UPDATE_IN_DATABASE"
DeleteBehavior: "LOG"
Configuration: "{\"Version\":1.0,\"CrawlerOutput\":{\"Partitions\":{\"AddOrUpdateBehavior\":\"InheritFromTable\"},\"Tables\":{\"AddOrUpdateBehavior\":\"MergeNewColumns\"}}}"
Targets:
S3Targets:
- Path: !Sub "${LakeBucket}/raw/coins_market_data/finance-banking"