Skip to content

Commit

Permalink
Prometheus exporter (#12)
Browse files Browse the repository at this point in the history
* prometheus exporter

* recreate migrations

* separate metrics container

* metrics added to functional test
  • Loading branch information
scrungus committed Aug 15, 2024
1 parent e71b4fc commit 28a9a87
Show file tree
Hide file tree
Showing 17 changed files with 448 additions and 262 deletions.
6 changes: 5 additions & 1 deletion .github/workflows/functional.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,8 @@ jobs:

- name: Run test
timeout-minutes: 10
run: tools/functional_test.sh
run: tools/functional_test.sh

- name: Setup upterm session
uses: lhotari/action-upterm@v1
if: ${{ failure() }}
28 changes: 23 additions & 5 deletions charts/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ spec:
args:
- -c
- >-
python /coral-credits/manage.py migrate &&
python /coral-credits/manage.py migrate --run-syncdb &&
python /coral-credits/manage.py createsuperuser --no-input || echo $?
volumeMounts:
- name: data
Expand All @@ -54,12 +54,12 @@ spec:
securityContext: {{ toYaml .Values.securityContext | nindent 12 }}
image: {{ printf "%s:%s" .Values.image.repository (default .Chart.AppVersion .Values.image.tag) }}
imagePullPolicy: {{ .Values.image.pullPolicy }}
#env:
# TODO(tylerchristie): inject stuff here
# TODO(tylerchristie): need metrics at some point
env:
- name: GUNICORN_PORT
value: {{ .Values.service.api.port | quote }}
ports:
- name: http
containerPort: 8080
containerPort: {{ .Values.service.api.port }}
protocol: TCP
livenessProbe:
{{- toYaml .Values.livenessProbe | nindent 12 }}
Expand All @@ -76,6 +76,24 @@ spec:
readOnly: true
- name: tmp
mountPath: /tmp
- name: prometheus-exporter
image: {{ printf "%s:%s" .Values.image.repository (default .Chart.AppVersion .Values.image.tag) }}
env:
- name: GUNICORN_PORT
value: {{ .Values.service.prometheusExporter.port | quote }}
- name: RUN_PROM
value: "true"
ports:
- name: metrics
containerPort: {{ .Values.service.prometheusExporter.port }}
protocol: TCP
resources: {{ toYaml .Values.resources | nindent 12 }}
volumeMounts:
- name: data
mountPath: /data
- name: runtime-settings
mountPath: /etc/coral-credits/settings.d
readOnly: true
{{- with .Values.nodeSelector }}
nodeSelector: {{ toYaml . | nindent 8 }}
{{- end }}
Expand Down
7 changes: 5 additions & 2 deletions charts/templates/service.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,11 @@ spec:
type: {{ .Values.service.type }}
ports:
- name: http
port: {{ .Values.service.port }}
port: {{ .Values.service.api.port }}
targetPort: http
protocol: TCP
# TODO(tylerchristie): expose monitoring
- name: metrics
port: {{ .Values.service.prometheusExporter.port }}
targetPort: metrics
protocol: TCP
selector: {{ include "coral-credits.selectorLabels" . | nindent 6 }}
5 changes: 4 additions & 1 deletion charts/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,10 @@ replicaCount: 1
# Service details for the api
service:
type: ClusterIP
port: 8080
api:
port: 8080
prometheusExporter:
port: 8000

# Node selector for pods
nodeSelector: {}
Expand Down
13 changes: 13 additions & 0 deletions coral_credits/api/apps.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,19 @@
import os

from django.apps import AppConfig


class ApiConfig(AppConfig):
default_auto_field = "django.db.models.BigAutoField"
name = "coral_credits.api"

def ready(self):
if os.environ.get("REGISTER_PROM_COLLECTOR") == "true":
return
else:
if os.environ.get("RUN_PROM") == "true":
from coral_credits.prom_exporter import CustomCollector
from prometheus_client.core import REGISTRY

REGISTRY.register(CustomCollector())
os.environ["REGISTER_PROM_COLLECTOR"] = "true"
79 changes: 69 additions & 10 deletions coral_credits/api/db_utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
import logging

from django.shortcuts import get_object_or_404
from django.utils import timezone

from coral_credits.api import db_exceptions, models

LOG = logging.getLogger(__name__)


def get_current_lease(current_lease):
current_consumer = get_object_or_404(
Expand All @@ -21,6 +25,38 @@ def get_resource_provider_account(project_id):
return resource_provider_account


def get_all_resource_provider_account():
resource_provider_accounts = models.ResourceProviderAccount.objects.all()
return resource_provider_accounts


def get_all_active_reservations(resource_provider_account):
"""Get all active reservation resources for an account:
Returns a list of dictionaries of the form:
[
{"resource_class": "resource_hours"},
{"resource_class": "resource_hours"},
...
]
"""
# TODO(tylerchristie): can probably refactor the credit check with this function.
resources = {}
consumers = models.Consumer.objects.filter(
resource_provider_account=resource_provider_account
)
for c in consumers:
resource_consumption_records = models.ResourceConsumptionRecord.objects.filter(
consumer=c
)
for rcr in resource_consumption_records:
if rcr.resource_class in resources:
resources[rcr.resource_class] += rcr.resource_hours
else:
resources[rcr.resource_class] = rcr.resource_hours
return resources


def get_credit_allocation(id):
now = timezone.now()
try:
Expand All @@ -38,29 +74,46 @@ def get_all_credit_allocations(resource_provider_account):
now = timezone.now()
credit_allocations = models.CreditAllocation.objects.filter(
account=resource_provider_account.account, start__lte=now, end__gte=now
).order_by("-start")
).order_by("pk")

return credit_allocations


def get_credit_allocation_resources(credit_allocations, resource_classes):
"""Returns a dictionary of the form:
{
"resource_class": "credit_resource_allocation"
}
"""
credit_allocation_resources = get_all_credit_allocation_resources(
credit_allocations
)
for resource_class in resource_classes:
if resource_class not in credit_allocation_resources:
raise db_exceptions.NoCreditAllocation(
f"No credit allocated for resource_type {resource_class}"
)
return credit_allocation_resources


def get_all_credit_allocation_resources(credit_allocations):
"""Returns a dictionary of the form:
{
"resource_class": "credit_resource_allocation"
}
"""
resource_allocations = {}
for credit_allocation in credit_allocations:
for resource_class in resource_classes:
credit_allocation_resource = models.CreditAllocationResource.objects.filter(
allocation=credit_allocation, resource_class=resource_class
).first()
if not credit_allocation_resource:
raise db_exceptions.NoCreditAllocation(
f"No credit allocated for resource_type {resource_class}"
)
resource_allocations[resource_class] = credit_allocation_resource
credit_allocation_resources = models.CreditAllocationResource.objects.filter(
allocation=credit_allocation
)
# TODO(tylerchristie): I think this breaks for the case where we have
# multiple credit allocations for the same resource_class.
for car in credit_allocation_resources:
resource_allocations[car.resource_class] = car

return resource_allocations


Expand Down Expand Up @@ -130,6 +183,12 @@ def get_resource_requests(lease, current_resource_requests=None):
else:
delta_resource_hours = requested_resource_hours

LOG.info(
f"Calculated {delta_resource_hours} hours for lease {lease.id} with "
f"requests {{resource_class: {resource_class}, amount: {amount}, "
f"duration: {lease.duration}}}"
)

resource_requests[resource_class] = delta_resource_hours

except KeyError:
Expand Down
71 changes: 51 additions & 20 deletions coral_credits/api/migrations/0001_initial.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by Django 5.0.1 on 2024-02-07 18:00
# Generated by Django 5.0.8 on 2024-08-14 15:41

import django.db.models.deletion
from django.db import migrations, models
Expand Down Expand Up @@ -63,7 +63,7 @@ class Migration(migrations.Migration):
],
),
migrations.CreateModel(
name="Consumer",
name="CreditAllocation",
fields=[
(
"id",
Expand All @@ -74,7 +74,7 @@ class Migration(migrations.Migration):
verbose_name="ID",
),
),
("consume_ref", models.CharField(max_length=200)),
("name", models.CharField(max_length=200)),
("created", models.DateTimeField(auto_now_add=True)),
("start", models.DateTimeField()),
("end", models.DateTimeField()),
Expand All @@ -85,20 +85,48 @@ class Migration(migrations.Migration):
to="api.creditaccount",
),
),
],
options={
"unique_together": {("account", "start"), ("name", "account")},
},
),
migrations.CreateModel(
name="ResourceProviderAccount",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("project_id", models.UUIDField()),
(
"resource_provider",
"account",
models.ForeignKey(
on_delete=django.db.models.deletion.DO_NOTHING,
on_delete=django.db.models.deletion.CASCADE,
to="api.creditaccount",
),
),
(
"provider",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
to="api.resourceprovider",
),
),
],
options={
"unique_together": {("consume_ref", "resource_provider")},
"unique_together": {
("account", "provider"),
("provider", "project_id"),
},
},
),
migrations.CreateModel(
name="CreditAllocation",
name="Consumer",
fields=[
(
"id",
Expand All @@ -109,20 +137,22 @@ class Migration(migrations.Migration):
verbose_name="ID",
),
),
("name", models.CharField(max_length=200)),
("consumer_ref", models.CharField(max_length=200)),
("consumer_uuid", models.UUIDField()),
("user_ref", models.UUIDField()),
("created", models.DateTimeField(auto_now_add=True)),
("start", models.DateTimeField()),
("end", models.DateTimeField()),
(
"account",
"resource_provider_account",
models.ForeignKey(
on_delete=django.db.models.deletion.DO_NOTHING,
to="api.creditaccount",
to="api.resourceprovideraccount",
),
),
],
options={
"unique_together": {("account", "start"), ("name", "account")},
"unique_together": {("consumer_uuid", "resource_provider_account")},
},
),
migrations.CreateModel(
Expand All @@ -137,27 +167,27 @@ class Migration(migrations.Migration):
verbose_name="ID",
),
),
(
"resource_hours",
models.DecimalField(decimal_places=2, max_digits=10),
),
("resource_hours", models.FloatField()),
("created", models.DateTimeField(auto_now_add=True)),
(
"allocation",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="resources",
to="api.creditallocation",
),
),
(
"resource_class",
models.ForeignKey(
on_delete=django.db.models.deletion.DO_NOTHING,
related_name="+",
to="api.resourceclass",
),
),
],
options={
"ordering": ("allocation__start",),
"unique_together": {("allocation", "resource_class")},
},
),
Expand All @@ -173,25 +203,26 @@ class Migration(migrations.Migration):
verbose_name="ID",
),
),
(
"resource_hours",
models.DecimalField(decimal_places=2, max_digits=10),
),
("resource_hours", models.FloatField()),
(
"consumer",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE, to="api.consumer"
on_delete=django.db.models.deletion.CASCADE,
related_name="resources",
to="api.consumer",
),
),
(
"resource_class",
models.ForeignKey(
on_delete=django.db.models.deletion.DO_NOTHING,
related_name="+",
to="api.resourceclass",
),
),
],
options={
"ordering": ("consumer__start",),
"unique_together": {("consumer", "resource_class")},
},
),
Expand Down
Loading

0 comments on commit 28a9a87

Please sign in to comment.