From 34fe162ba35028a2998bd6277834a6799e03a756 Mon Sep 17 00:00:00 2001 From: jbeemster Date: Wed, 25 Aug 2021 12:12:38 +0200 Subject: [PATCH] Initial commit --- .github/workflows/ci.yml | 37 ++++ .gitignore | 35 ++++ CHANGELOG | 3 + LICENSE-2.0.txt | 202 +++++++++++++++++++++ README.md | 245 +++++++++++++++++++++++++ main.tf | 288 ++++++++++++++++++++++++++++++ outputs.tf | 14 ++ templates/config.json.tmpl | 20 +++ templates/iglu_resolver.json.tmpl | 8 + templates/startup-script.sh.tmpl | 84 +++++++++ variables.tf | 187 +++++++++++++++++++ versions.tf | 10 ++ 12 files changed, 1133 insertions(+) create mode 100644 .github/workflows/ci.yml create mode 100644 .gitignore create mode 100644 CHANGELOG create mode 100644 LICENSE-2.0.txt create mode 100644 README.md create mode 100644 main.tf create mode 100644 outputs.tf create mode 100644 templates/config.json.tmpl create mode 100644 templates/iglu_resolver.json.tmpl create mode 100644 templates/startup-script.sh.tmpl create mode 100644 variables.tf create mode 100644 versions.tf diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..ca21414 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,37 @@ +name: ci + +on: + push: + branches: + - '*' + - '*/*' + - '**' + +jobs: + test: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v2 + + - name: Install Go + uses: actions/setup-go@v1 + with: + go-version: 1.16 + + - name: Cache go modules + uses: actions/cache@v2 + with: + path: ~/go/pkg/mod + key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} + restore-keys: | + ${{ runner.os }}-go- + + - name: Install Terraform + uses: hashicorp/setup-terraform@v1 + with: + terraform_version: 0.15.5 + terraform_wrapper: false + + - name: Check formatting + run: terraform fmt -check -recursive diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bec9292 --- /dev/null +++ b/.gitignore @@ -0,0 +1,35 @@ + +# Local .terraform directories +**/.terraform/* + +# .tfstate files +*.tfstate +*.tfstate.* +*.terraform.lock.hcl + +# Crash log files +crash.log + +# Ignore any .tfvars files that are generated automatically for each Terraform run. Most +# .tfvars files are managed as part of configuration and so should be included in +# version control. +# +# example.tfvars +*.tfvars + +# Ignore override files as they are usually used to override resources locally and so +# are not checked in +override.tf +override.tf.json +*_override.tf +*_override.tf.json + +# Credentials Files +**/credentials.json +**/*.json + +# Local testing variables +*.tfvars + +/.idea/ +.DS_Store diff --git a/CHANGELOG b/CHANGELOG new file mode 100644 index 0000000..f4a0dd7 --- /dev/null +++ b/CHANGELOG @@ -0,0 +1,3 @@ +Version 0.1.0 (2021-08-25) +-------------------------- +Initial release diff --git a/LICENSE-2.0.txt b/LICENSE-2.0.txt new file mode 100644 index 0000000..393f675 --- /dev/null +++ b/LICENSE-2.0.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2021-2021 Snowplow Analytics Ltd. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..ba6168d --- /dev/null +++ b/README.md @@ -0,0 +1,245 @@ +[![Release][release-image]][release] [![CI][ci-image]][ci] [![License][license-image]][license] [![Registry][registry-image]][registry] [![Source][source-image]][source] + +# terraform-google-postgres-loader-pubsub-ce + +A Terraform module which deploys a Snowplow Postgres Loader application on Google running on top of Compute Engine. If you want to use a custom image for this deployment you will need to ensure it is based on top of Ubuntu 20.04. + +## Telemetry + +This module by default collects and forwards telemetry information to Snowplow to understand how our applications are being used. No identifying information about your sub-account or account fingerprints are ever forwarded to us - it is very simple information about what modules and applications are deployed and active. + +If you wish to subscribe to our mailing list for updates to these modules or security advisories please set the `user_provided_id` variable to include a valid email address which we can reach you at. + +### How do I disable it? + +To disable telemetry simply set variable `telemetry_enabled = false`. + +### What are you collecting? + +For details on what information is collected please see this module: https://github.com/snowplow-devops/terraform-snowplow-telemetry + +## Usage + +The Postgres Loader can load both your enriched and bad data into a Postgres database - by default we are using CloudSQL as it affords a simple and cost effective way to get started. + +To start loading "enriched" data into Postgres: + +```hcl +module "enriched_topic" { + source = "snowplow-devops/pubsub-topic/google" + version = "0.1.0" + + name = "enriched-topic" +} + +module "pipeline_db" { + source = "snowplow-devops/cloud-sql/google" + version = "0.1.0" + + name = "pipeline-db" + + region = var.region + db_name = local.pipeline_db_name + db_username = local.pipeline_db_username + db_password = local.pipeline_db_password + + # Note: this exposes your data to the internet - take care to ensure your allowlist is strict enough + authorized_networks = local.pipeline_authorized_networks + + # Note: required for higher concurrent connections count which is neccesary for loading both good and bad data at the same time + tier = "db-g1-small" +} + +module "postgres_loader_enriched" { + source = "snowplow-devops/postgres-loader-pubsub-ce/google" + + name = "pg-loader-enriched-server" + + network = var.network + subnetwork = var.subnetwork + region = var.region + project_id = var.project_id + + ssh_key_pairs = [] + ssh_ip_allowlist = ["0.0.0.0/0"] + + in_topic_name = module.enriched_topic.name + purpose = "ENRICHED_EVENTS" + schema_name = "atomic" + + # Note: Using the connection_name will enforce the use of a Cloud SQL Proxy rather than a direct connection + # To instead use a direct connection you will need to define the `db_host` parameter instead. + db_instance_name = module.pipeline_db.connection_name + db_port = module.pipeline_db.port + db_name = local.pipeline_db_name + db_username = local.pipeline_db_username + db_password = local.pipeline_db_password + + # Linking in the custom Iglu Server here + custom_iglu_resolvers = [ + { + name = "Iglu Server" + priority = 0 + uri = "http://your-iglu-server-endpoint/api" + api_key = var.iglu_super_api_key + vendor_prefixes = [] + } + ] +} +``` + +To load the "bad" data instead: + +```hcl +module "bad_1_topic" { + source = "snowplow-devops/pubsub-topic/google" + version = "0.1.0" + + name = "bad-1-topic" +} + +module "postgres_loader_bad" { + source = "snowplow-devops/postgres-loader-pubsub-ce/google" + + name = "pg-loader-bad-server" + + network = var.network + subnetwork = var.subnetwork + region = var.region + project_id = var.project_id + + ssh_key_pairs = [] + ssh_ip_allowlist = ["0.0.0.0/0"] + + in_topic_name = module.bad_1_topic.name + + # Note: The purpose defines what the input data set should look like + purpose = "JSON" + + # Note: This schema is created automatically by the VM on launch + schema_name = "atomic_bad" + + # Note: Using the connection_name will enforce the use of a Cloud SQL Proxy rather than a direct connection + # To instead use a direct connection you will need to define the `db_host` parameter instead. + db_instance_name = module.pipeline_db.connection_name + db_port = module.pipeline_db.port + db_name = local.pipeline_db_name + db_username = local.pipeline_db_username + db_password = local.pipeline_db_password + + # Linking in the custom Iglu Server here + custom_iglu_resolvers = [ + { + name = "Iglu Server" + priority = 0 + uri = "http://your-iglu-server-endpoint/api" + api_key = var.iglu_super_api_key + vendor_prefixes = [] + } + ] +} +``` + +## Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 0.15 | +| [google](#requirement\_google) | >= 3.44.0 | + +## Providers + +| Name | Version | +|------|---------| +| [google](#provider\_google) | >= 3.44.0 | + +## Modules + +| Name | Source | Version | +|------|--------|---------| +| [telemetry](#module\_telemetry) | snowplow-devops/telemetry/snowplow | 0.2.0 | + +## Resources + +| Name | Type | +|------|------| +| [google_compute_firewall.egress](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_firewall) | resource | +| [google_compute_firewall.ingress_ssh](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_firewall) | resource | +| [google_compute_instance_template.tpl](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_instance_template) | resource | +| [google_compute_region_instance_group_manager.grp](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_region_instance_group_manager) | resource | +| [google_project_iam_member.sa_cloud_sql_client](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/project_iam_member) | resource | +| [google_project_iam_member.sa_logging_log_writer](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/project_iam_member) | resource | +| [google_project_iam_member.sa_pubsub_publisher](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/project_iam_member) | resource | +| [google_project_iam_member.sa_pubsub_subscriber](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/project_iam_member) | resource | +| [google_project_iam_member.sa_pubsub_viewer](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/project_iam_member) | resource | +| [google_pubsub_subscription.in](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/pubsub_subscription) | resource | +| [google_service_account.sa](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/service_account) | resource | +| [google_compute_image.ubuntu_20_04](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/compute_image) | data source | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [db\_name](#input\_db\_name) | The name of the database to connect to | `string` | n/a | yes | +| [db\_password](#input\_db\_password) | The password to use to connect to the database | `string` | n/a | yes | +| [db\_port](#input\_db\_port) | The port the database is running on | `number` | n/a | yes | +| [db\_username](#input\_db\_username) | The username to use to connect to the database | `string` | n/a | yes | +| [in\_topic\_name](#input\_in\_topic\_name) | The name of the input pubsub topic that the loader will pull data from | `string` | n/a | yes | +| [name](#input\_name) | A name which will be pre-pended to the resources created | `string` | n/a | yes | +| [network](#input\_network) | The name of the network to deploy within | `string` | n/a | yes | +| [project\_id](#input\_project\_id) | The id of the project in which this resource is created | `string` | n/a | yes | +| [purpose](#input\_purpose) | The type of data the loader will be pulling which can be one of ENRICHED\_EVENTS or JSON (Note: JSON can be used for loading bad rows) | `string` | n/a | yes | +| [region](#input\_region) | The name of the region to deploy within | `string` | n/a | yes | +| [schema\_name](#input\_schema\_name) | The database schema to load data into (e.g atomic \| atomic\_bad) | `string` | n/a | yes | +| [associate\_public\_ip\_address](#input\_associate\_public\_ip\_address) | Whether to assign a public ip address to this instance; if false this instance must be behind a Cloud NAT to connect to the internet | `bool` | `true` | no | +| [custom\_iglu\_resolvers](#input\_custom\_iglu\_resolvers) | The custom Iglu Resolvers that will be used by the loader to resolve and validate events |
list(object({
name = string
priority = number
uri = string
api_key = string
vendor_prefixes = list(string)
}))
| `[]` | no | +| [db\_host](#input\_db\_host) | The hostname of the database to connect to (Note: if db\_instance\_name is non-empty this setting is ignored) | `string` | `""` | no | +| [db\_instance\_name](#input\_db\_instance\_name) | The instance name of the CloudSQL instance to connect to (Note: if set db\_host will be ignored and a proxy established instead) | `string` | `""` | no | +| [default\_iglu\_resolvers](#input\_default\_iglu\_resolvers) | The default Iglu Resolvers that will be used by the loader to resolve and validate events |
list(object({
name = string
priority = number
uri = string
api_key = string
vendor_prefixes = list(string)
}))
|
[
{
"api_key": "",
"name": "Iglu Central",
"priority": 10,
"uri": "http://iglucentral.com",
"vendor_prefixes": []
},
{
"api_key": "",
"name": "Iglu Central - Mirror 01",
"priority": 20,
"uri": "http://mirror01.iglucentral.com",
"vendor_prefixes": []
}
]
| no | +| [gcp\_logs\_enabled](#input\_gcp\_logs\_enabled) | Whether application logs should be reported to GCP Logging | `bool` | `true` | no | +| [labels](#input\_labels) | The labels to append to this resource | `map(string)` | `{}` | no | +| [machine\_type](#input\_machine\_type) | The machine type to use | `string` | `"e2-small"` | no | +| [ssh\_block\_project\_keys](#input\_ssh\_block\_project\_keys) | Whether to block project wide SSH keys | `bool` | `true` | no | +| [ssh\_ip\_allowlist](#input\_ssh\_ip\_allowlist) | The list of CIDR ranges to allow SSH traffic from | `list(any)` |
[
"0.0.0.0/0"
]
| no | +| [ssh\_key\_pairs](#input\_ssh\_key\_pairs) | The list of SSH key-pairs to add to the servers |
list(object({
user_name = string
public_key = string
}))
| `[]` | no | +| [subnetwork](#input\_subnetwork) | The name of the sub-network to deploy within; if populated will override the 'network' setting | `string` | `""` | no | +| [target\_size](#input\_target\_size) | The number of servers to deploy | `number` | `1` | no | +| [telemetry\_enabled](#input\_telemetry\_enabled) | Whether or not to send telemetry information back to Snowplow Analytics Ltd | `bool` | `true` | no | +| [ubuntu\_20\_04\_source\_image](#input\_ubuntu\_20\_04\_source\_image) | The source image to use which must be based of of Ubuntu 20.04; by default the latest community version is used | `string` | `""` | no | +| [user\_provided\_id](#input\_user\_provided\_id) | An optional unique identifier to identify the telemetry events emitted by this stack | `string` | `""` | no | + +## Outputs + +| Name | Description | +|------|-------------| +| [instance\_group\_url](#output\_instance\_group\_url) | The full URL of the instance group created by the manager | +| [manager\_id](#output\_manager\_id) | Identifier for the instance group manager | +| [manager\_self\_link](#output\_manager\_self\_link) | The URL for the instance group manager | + +# Copyright and license + +The Terraform Google Postgres Loader on Compute Engine project is Copyright 2021-2021 Snowplow Analytics Ltd. + +Licensed under the [Apache License, Version 2.0][license] (the "License"); +you may not use this software except in compliance with the License. + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +[release]: https://github.com/snowplow-devops/terraform-google-postgres-loader-pubsub-ce/releases/latest +[release-image]: https://img.shields.io/github/v/release/snowplow-devops/terraform-google-postgres-loader-pubsub-ce + +[ci]: https://github.com/snowplow-devops/terraform-google-postgres-loader-pubsub-ce/actions?query=workflow%3Aci +[ci-image]: https://github.com/snowplow-devops/terraform-google-postgres-loader-pubsub-ce/workflows/ci/badge.svg + +[license]: https://www.apache.org/licenses/LICENSE-2.0 +[license-image]: https://img.shields.io/badge/license-Apache--2-blue.svg?style=flat + +[registry]: https://registry.terraform.io/modules/snowplow-devops/postgres-loader-pubsub-ce/google/latest +[registry-image]: https://img.shields.io/static/v1?label=Terraform&message=Registry&color=7B42BC&logo=terraform + +[source]: https://github.com/snowplow-incubator/snowplow-postgres-loader +[source-image]: https://img.shields.io/static/v1?label=Snowplow&message=Postgres%20Loader&color=0E9BA4&logo=GitHub diff --git a/main.tf b/main.tf new file mode 100644 index 0000000..d9811ad --- /dev/null +++ b/main.tf @@ -0,0 +1,288 @@ +locals { + module_name = "postgres-loader-pubsub-ce" + module_version = "0.1.0" + + app_name = "snowplow-postgres-loader" + app_version = "0.2.0" + + local_labels = { + name = var.name + app_name = local.app_name + app_version = replace(local.app_version, ".", "-") + module_name = local.module_name + module_version = replace(local.module_version, ".", "-") + } + + labels = merge( + var.labels, + local.local_labels + ) +} + +module "telemetry" { + source = "snowplow-devops/telemetry/snowplow" + version = "0.2.0" + + count = var.telemetry_enabled ? 1 : 0 + + user_provided_id = var.user_provided_id + cloud = "GCP" + region = var.region + app_name = local.app_name + app_version = local.app_version + module_name = local.module_name + module_version = local.module_version +} + +data "google_compute_image" "ubuntu_20_04" { + family = "ubuntu-2004-lts" + project = "ubuntu-os-cloud" +} + +# --- IAM: Service Account setup + +resource "google_service_account" "sa" { + account_id = var.name + display_name = "Snowplow PG Loader service account - ${var.name}" +} + +resource "google_project_iam_member" "sa_pubsub_viewer" { + role = "roles/pubsub.viewer" + member = "serviceAccount:${google_service_account.sa.email}" +} + +resource "google_project_iam_member" "sa_pubsub_subscriber" { + role = "roles/pubsub.subscriber" + member = "serviceAccount:${google_service_account.sa.email}" +} + +resource "google_project_iam_member" "sa_pubsub_publisher" { + role = "roles/pubsub.publisher" + member = "serviceAccount:${google_service_account.sa.email}" +} + +resource "google_project_iam_member" "sa_logging_log_writer" { + role = "roles/logging.logWriter" + member = "serviceAccount:${google_service_account.sa.email}" +} + +resource "google_project_iam_member" "sa_cloud_sql_client" { + role = "roles/cloudsql.client" + member = "serviceAccount:${google_service_account.sa.email}" +} + +# --- CE: Firewall rules + +resource "google_compute_firewall" "ingress_ssh" { + name = "${var.name}-ssh-in" + + network = var.network + target_tags = [var.name] + + allow { + protocol = "tcp" + ports = ["22"] + } + + source_ranges = var.ssh_ip_allowlist +} + +resource "google_compute_firewall" "egress" { + name = "${var.name}-traffic-out" + + network = var.network + target_tags = [var.name] + + allow { + protocol = "tcp" + ports = ["80", "443", var.db_port] + } + + allow { + protocol = "udp" + ports = ["123"] + } + + direction = "EGRESS" + destination_ranges = ["0.0.0.0/0"] +} + +# --- CE: Instance group setup + +resource "google_pubsub_subscription" "in" { + name = var.name + topic = var.in_topic_name + + expiration_policy { + ttl = "" + } + + labels = local.labels +} + +locals { + resolvers_raw = concat(var.default_iglu_resolvers, var.custom_iglu_resolvers) + + resolvers_open = [ + for resolver in local.resolvers_raw : merge( + { + name = resolver["name"], + priority = resolver["priority"], + vendorPrefixes = resolver["vendor_prefixes"], + connection = { + http = { + uri = resolver["uri"] + } + } + } + ) if resolver["api_key"] == "" + ] + + resolvers_closed = [ + for resolver in local.resolvers_raw : merge( + { + name = resolver["name"], + priority = resolver["priority"], + vendorPrefixes = resolver["vendor_prefixes"], + connection = { + http = { + uri = resolver["uri"] + apikey = resolver["api_key"] + } + } + } + ) if resolver["api_key"] != "" + ] + + resolvers = flatten([ + local.resolvers_open, + local.resolvers_closed + ]) + + iglu_resolver = templatefile("${path.module}/templates/iglu_resolver.json.tmpl", { resolvers = jsonencode(local.resolvers) }) + + # Note: If we are provided a valid DB Instance Name leverage CloudSQL proxy + db_host = var.db_instance_name == "" ? var.db_host : "127.0.0.1" + + config = templatefile("${path.module}/templates/config.json.tmpl", { + project_id = var.project_id + in_subscription_name = google_pubsub_subscription.in.name + db_host = local.db_host + db_port = var.db_port + db_name = var.db_name + db_username = var.db_username + db_password = var.db_password + schema_name = var.schema_name + purpose = var.purpose + }) + + startup_script = templatefile("${path.module}/templates/startup-script.sh.tmpl", { + config = local.config + iglu_resolver = local.iglu_resolver + version = local.app_version + db_host = local.db_host + db_port = var.db_port + db_name = var.db_name + db_username = var.db_username + db_password = var.db_password + schema_name = var.schema_name + + db_instance_name = var.db_instance_name + cloud_sql_proxy_enabled = var.db_instance_name != "" + + telemetry_script = join("", module.telemetry.*.gcp_ubuntu_20_04_user_data) + + gcp_logs_enabled = var.gcp_logs_enabled + }) + + ssh_keys_metadata = < $${CONFIG_DIR}/postgres_loader.json +${config} +EOF + +sudo cat << EOF > $${CONFIG_DIR}/iglu_resolver.json +${iglu_resolver} +EOF + +# Create the schema to load data into +sudo cat << EOF > $${CONFIG_DIR}/create-schema.sql +CREATE SCHEMA IF NOT EXISTS ${schema_name}; +EOF + +%{ if cloud_sql_proxy_enabled ~} +# Setup the proxy service +sudo docker run \ + -d \ + --name cloud-sql-proxy \ + --restart always \ + --network host \ +%{ if gcp_logs_enabled ~} + --log-driver gcplogs \ +%{ endif ~} + gcr.io/cloudsql-docker/gce-proxy:1.19.1 \ + /cloud_sql_proxy -instances=${db_instance_name}=tcp:${db_host}:${db_port} +sleep 5 +%{ endif ~} + +# Create the schema +sudo docker run \ + --name create_schema \ + --network host \ +%{ if gcp_logs_enabled ~} + --log-driver gcplogs \ +%{ endif ~} + -v $${CONFIG_DIR}:/snowplow/config \ + -e 'PGUSER=${db_username}' \ + -e 'PGPASSWORD=${db_password}' \ + postgres:13 \ + psql -h ${db_host} -d ${db_name} -p ${db_port} -f /snowplow/config/create-schema.sql + +# Launch the loader +sudo docker run \ + -d \ + --name postgres_loader \ + --restart always \ + --network host \ +%{ if gcp_logs_enabled ~} + --log-driver gcplogs \ +%{ else ~} + --log-opt max-size=10m \ + --log-opt max-file=5 \ +%{ endif ~} + -v $${CONFIG_DIR}:/snowplow/config \ + -e 'JAVA_OPTS=-Dorg.slf4j.simpleLogger.defaultLogLevel=info' \ + snowplow/snowplow-postgres-loader:${version} \ + --config /snowplow/config/postgres_loader.json \ + --resolver /snowplow/config/iglu_resolver.json + +${telemetry_script} diff --git a/variables.tf b/variables.tf new file mode 100644 index 0000000..9d6c61b --- /dev/null +++ b/variables.tf @@ -0,0 +1,187 @@ +variable "name" { + description = "A name which will be pre-pended to the resources created" + type = string +} + +variable "project_id" { + description = "The id of the project in which this resource is created" + type = string +} + +variable "region" { + description = "The name of the region to deploy within" + type = string +} + +variable "network" { + description = "The name of the network to deploy within" + type = string +} + +variable "subnetwork" { + description = "The name of the sub-network to deploy within; if populated will override the 'network' setting" + type = string + default = "" +} + +variable "machine_type" { + description = "The machine type to use" + type = string + default = "e2-small" +} + +variable "target_size" { + description = "The number of servers to deploy" + default = 1 + type = number +} + +variable "associate_public_ip_address" { + description = "Whether to assign a public ip address to this instance; if false this instance must be behind a Cloud NAT to connect to the internet" + type = bool + default = true +} + +variable "ssh_ip_allowlist" { + description = "The list of CIDR ranges to allow SSH traffic from" + type = list(any) + default = ["0.0.0.0/0"] +} + +variable "ssh_block_project_keys" { + description = "Whether to block project wide SSH keys" + type = bool + default = true +} + +variable "ssh_key_pairs" { + description = "The list of SSH key-pairs to add to the servers" + default = [] + type = list(object({ + user_name = string + public_key = string + })) +} + +variable "ubuntu_20_04_source_image" { + description = "The source image to use which must be based of of Ubuntu 20.04; by default the latest community version is used" + default = "" + type = string +} + +variable "labels" { + description = "The labels to append to this resource" + default = {} + type = map(string) +} + +variable "gcp_logs_enabled" { + description = "Whether application logs should be reported to GCP Logging" + default = true + type = bool +} + +# --- Configuration options + +variable "in_topic_name" { + description = "The name of the input pubsub topic that the loader will pull data from" + type = string +} + +variable "purpose" { + description = "The type of data the loader will be pulling which can be one of ENRICHED_EVENTS or JSON (Note: JSON can be used for loading bad rows)" + type = string +} + +variable "schema_name" { + description = "The database schema to load data into (e.g atomic | atomic_bad)" + type = string +} + +variable "db_instance_name" { + description = "The instance name of the CloudSQL instance to connect to (Note: if set db_host will be ignored and a proxy established instead)" + type = string + default = "" +} + +variable "db_host" { + description = "The hostname of the database to connect to (Note: if db_instance_name is non-empty this setting is ignored)" + type = string + default = "" +} + +variable "db_port" { + description = "The port the database is running on" + type = number +} + +variable "db_name" { + description = "The name of the database to connect to" + type = string +} + +variable "db_username" { + description = "The username to use to connect to the database" + type = string +} + +variable "db_password" { + description = "The password to use to connect to the database" + type = string + sensitive = true +} + +# --- Iglu Resolver + +variable "default_iglu_resolvers" { + description = "The default Iglu Resolvers that will be used by the loader to resolve and validate events" + default = [ + { + name = "Iglu Central" + priority = 10 + uri = "http://iglucentral.com" + api_key = "" + vendor_prefixes = [] + }, + { + name = "Iglu Central - Mirror 01" + priority = 20 + uri = "http://mirror01.iglucentral.com" + api_key = "" + vendor_prefixes = [] + } + ] + type = list(object({ + name = string + priority = number + uri = string + api_key = string + vendor_prefixes = list(string) + })) +} + +variable "custom_iglu_resolvers" { + description = "The custom Iglu Resolvers that will be used by the loader to resolve and validate events" + default = [] + type = list(object({ + name = string + priority = number + uri = string + api_key = string + vendor_prefixes = list(string) + })) +} + +# --- Telemetry + +variable "telemetry_enabled" { + description = "Whether or not to send telemetry information back to Snowplow Analytics Ltd" + type = bool + default = true +} + +variable "user_provided_id" { + description = "An optional unique identifier to identify the telemetry events emitted by this stack" + type = string + default = "" +} diff --git a/versions.tf b/versions.tf new file mode 100644 index 0000000..1becc2c --- /dev/null +++ b/versions.tf @@ -0,0 +1,10 @@ +terraform { + required_version = ">= 0.15" + + required_providers { + google = { + source = "hashicorp/google" + version = ">= 3.44.0" + } + } +}