From 1095c2cbd0497460519d3f79dd1ee382c68d3c4d Mon Sep 17 00:00:00 2001 From: Ben England Date: Tue, 8 Jun 2021 17:23:43 -0400 Subject: [PATCH] Auto osd cache drop (#570) only do ceph osd cache dropping if user requests it default to openshift for benchmark-operator add option to drop Ceph OSD cache to CR document Ceph OSD cache dropping user must start cache dropper and ceph toolbox pod test both OSD cache dropping and kernel cache dropping at same time only if openshift-storage namespace is defined --- docs/cache_dropping.md | 38 ++++- group_vars/all.yml | 6 + playbook.yml | 4 + .../crds/ripsaw_v1alpha1_ripsaw_crd.yaml | 2 + .../rook_ceph_drop_cache_pod.yaml | 22 ++- roles/ceph_osd_cache_drop/tasks/main.yml | 130 ++++++++++++++++++ roles/ceph_osd_cache_drop/vars/main.yml | 3 + roles/ceph_osd_cache_drop/wait_for_pod.py | 75 ++++++++++ roles/fio_distributed/templates/client.yaml | 10 +- roles/kernel_cache_drop/tasks/main.yml | 4 +- roles/smallfile/templates/workload_job.yml.j2 | 12 +- tests/test_crs/valid_fiod_ocs_cache_drop.yaml | 85 ++++++++++++ tests/test_fiod.sh | 14 +- 13 files changed, 392 insertions(+), 13 deletions(-) rename {resources => roles/ceph_osd_cache_drop}/rook_ceph_drop_cache_pod.yaml (58%) create mode 100644 roles/ceph_osd_cache_drop/tasks/main.yml create mode 100644 roles/ceph_osd_cache_drop/vars/main.yml create mode 100644 roles/ceph_osd_cache_drop/wait_for_pod.py create mode 100644 tests/test_crs/valid_fiod_ocs_cache_drop.yaml diff --git a/docs/cache_dropping.md b/docs/cache_dropping.md index 360ecf57e..9d2836c4c 100644 --- a/docs/cache_dropping.md +++ b/docs/cache_dropping.md @@ -23,12 +23,15 @@ There are different types of caching that occur in the system - (Ceph OCS) OSD caching (not yet supported fully) you can control which type of cache dropping -is done using these CR fields in the workload args section: +is done using one or both of these CR fields in the workload args section: ``` drop_cache_kernel: true +drop_cache_rook_ceph: true ``` +## how to drop kernel cache + For this to work, you must **label** the nodes that you want to drop kernel cache, for example: ``` @@ -53,6 +56,24 @@ Benchmarks supported for kernel cache dropping at present are: - fio - smallfile +## how to drop Ceph OSD cache + +for this to work with OpenShift Container Storage, you must start both the Ceph toolbox pod in OCS +and the cache dropper pod. You can do this with: + +``` +oc patch OCSInitialization ocsinit -n openshift-storage --type json --patch \ + '[{ "op": "replace", "path": "/spec/enableCephTools", "value": true }]' + +oc create -f roles/ceph_osd_cache_drop/rook_ceph_drop_cache_pod.yaml + +oc -n openshift-storage get pod | awk '/tool/||/drop/' +``` + +when you see both of these pods in the running state, then you can use benchmark operator. The reason that +you have to manually start these two pods running is that the benchmark-operator does not have authorization +to run them in the openshift-storage namespace and get access to the secrets needed to do this. + # implementation notes For benchmark developers... @@ -84,3 +105,18 @@ For example, in your workload.yml.j2 where it creates the environment variables - name: KCACHE_DROP_PORT_NUM value: "{{ kernel_cache_drop_svc_port }}" ``` + +similarly, for Ceph OSD cache dropping, you must add this to one of your workload pods' environment variables: +``` + +{% if ceph_osd_cache_drop_pod_ip is defined %} + - name: ceph_osd_cache_drop_pod_ip + value: "{{ ceph_osd_cache_drop_pod_ip }}" + - name: CEPH_CACHE_DROP_PORT_NUM + value: "{{ ceph_cache_drop_svc_port }}" +{% endif %} + +``` +The ansible playbook for benchmark-operator will look up the ceph_osd_cache_drop_pod_ip IP address and fill in this var, +all you have to do is pass it in. See the ceph_osd_cache_drop ansible role for details. + diff --git a/group_vars/all.yml b/group_vars/all.yml index 60cc68ab0..d14efd36a 100644 --- a/group_vars/all.yml +++ b/group_vars/all.yml @@ -1,4 +1,10 @@ --- operator_namespace: '{{ ansible_operator_meta.namespace }}' + +# for upstream kubernetes this might be rook-ceph +# FIXME: how do we automatically determine that we are in openshift? +rook_ceph_namespace: 'openshift-storage' + clustername: 'myk8scluster' kernel_cache_drop_svc_port: 9222 +ceph_cache_drop_svc_port: 9457 diff --git a/playbook.yml b/playbook.yml index 915451755..3236e2b92 100644 --- a/playbook.yml +++ b/playbook.yml @@ -4,6 +4,10 @@ gather_facts: no tasks: + - include_role: + name: "ceph_osd_cache_drop" + when: workload.args.drop_cache_rook_ceph is defined + - include_role: name: "kernel_cache_drop" when: workload.args.drop_cache_kernel is defined diff --git a/resources/crds/ripsaw_v1alpha1_ripsaw_crd.yaml b/resources/crds/ripsaw_v1alpha1_ripsaw_crd.yaml index c9c0ba7dd..94a0244dc 100644 --- a/resources/crds/ripsaw_v1alpha1_ripsaw_crd.yaml +++ b/resources/crds/ripsaw_v1alpha1_ripsaw_crd.yaml @@ -135,6 +135,8 @@ spec: type: string drop_cache_kernel: type: boolean + drop_cache_rook_ceph: + type: boolean status: type: object properties: diff --git a/resources/rook_ceph_drop_cache_pod.yaml b/roles/ceph_osd_cache_drop/rook_ceph_drop_cache_pod.yaml similarity index 58% rename from resources/rook_ceph_drop_cache_pod.yaml rename to roles/ceph_osd_cache_drop/rook_ceph_drop_cache_pod.yaml index a4ec8d5c1..b7ffaec7f 100644 --- a/resources/rook_ceph_drop_cache_pod.yaml +++ b/roles/ceph_osd_cache_drop/rook_ceph_drop_cache_pod.yaml @@ -2,21 +2,33 @@ apiVersion: v1 kind: Pod metadata: name: rook-ceph-osd-cache-drop - namespace: rook-ceph + # if you are using upstream rook-ceph, change the next line + #namespace: "{{ rook_ceph_namespace }}" + namespace: "openshift-storage" + labels: + app: rook-ceph-osd-cache-drop spec: containers: - name: rook-ceph-osd-cache-drop - image: quay.io/cloud-bulldozer/ceph-cache-dropper:latest + image: "quay.io/cloud-bulldozer/ceph-cache-dropper:latest" imagePullPolicy: Always command: [/bin/sh, -c] args: - - cd /opt/bohica/ceph-cache-dropper; python3 ./osd-cache-drop-websvc.py + - cd /opt/ceph_cache_drop; python3 ./osd-cache-drop-websvc.py env: - - name: ROOK_ADMIN_SECRET + - name: ROOK_CEPH_USERNAME valueFrom: secretKeyRef: - key: admin-secret name: rook-ceph-mon + key: ceph-username + - name: ROOK_CEPH_SECRET + valueFrom: + secretKeyRef: + name: rook-ceph-mon + key: ceph-secret + - name: ceph_cache_drop_port + #value: "{{ ceph_cache_drop_svc_port }}" + value: "9457" securityContext: privileged: true volumeMounts: diff --git a/roles/ceph_osd_cache_drop/tasks/main.yml b/roles/ceph_osd_cache_drop/tasks/main.yml new file mode 100644 index 000000000..aecda4722 --- /dev/null +++ b/roles/ceph_osd_cache_drop/tasks/main.yml @@ -0,0 +1,130 @@ +--- +- debug: + msg: "in ceph OSD cache dropper role" + +# First ensure that toolbox pod is running + +- name: check for existing rook-ceph cluster + k8s_info: + kind: Pod + namespace: "{{ rook_ceph_namespace }}" + label_selectors: + - app = rook-ceph-mgr + register: rook_ceph_mgr_exists + +- name: generate boolean for namespace existence + set_fact: + rook_ceph_cluster_exists: "{{ rook_ceph_mgr_exists.resources | length > 0 }}" + +- debug: + var: rook_ceph_cluster_exists + +- fail: + msg: "You are asking for Ceph cache with drop_cache_rook_ceph: true in CR, but there is no ceph cluster!" + when: not rook_ceph_cluster_exists + +#- name: get pod info of pre-existing ceph toolbox pod +# k8s_info: +# kind: Pod +# label_selectors: +# - app = rook-ceph-tools +# namespace: "{{ rook_ceph_namespace }}" +# register: ceph_toolbox_already_exists + +#- debug: +# var: ceph_toolbox_already_exists + +# FIXME: next commented-out task gets 403 unprivileged error +# workaround is to issue oc command (with admin privs) to do it +# oc patch OCSInitialization ocsinit -n openshift-storage --type json --patch +# '[{ "op": "replace", "path": "/spec/enableCephTools", "value": true }]' + +#- name: ensure Ceph OSD toolbox pod is started +# k8s: +# namespace: "{{ rook_ceph_namespace }}" +# name: ocsinit +# kind: OCSInitialization +# state: present +# apply: true +# resource_definition: +# spec: +# enableCephTools: true +# when: ceph_toolbox_already_exists.resources | length == 0 + +#- name: wait for toolbox pod to start +# shell: "python3 /opt/ansible/roles/ceph_osd_cache_drop/wait_for_pod.py 30 {{ rook_ceph_namespace }} ceph-toolbox" +# when: ceph_toolbox_already_exists.resources | length == 0 + +- name: get pod name of running ceph toolbox pod + k8s_info: + kind: Pod + label_selectors: + - app = rook-ceph-tools + namespace: "{{ rook_ceph_namespace }}" + register: ceph_toolbox + +#- debug: +# var: ceph_toolbox + +# this next var is referenced by the YAML that creates the cache dropper pod + +- name: put pod id into a var + set_fact: + rook_ceph_toolbox_pod: "{{ ceph_toolbox.resources[0].metadata.name }}" + +- debug: + var: rook_ceph_toolbox_pod + + +# now ensure that ceph cache dropper pod is started and we have its IP + +#- name: get pre-existing ceph cache dropper pod +# k8s_info: +# kind: Pod +# label_selectors: +# - app = rook-ceph-osd-cache-drop +# namespace: "{{ rook_ceph_namespace }}" +# register: drop_pod_already_exists + +#- debug: +# var: drop_pod_already_exists + +# FIXME: this gets 403 unprivileged error +# workaround is to issue oc create -f command (with admin privs) to do it +# you must substitute a real value for the jinja2 var in this template first + +#- name: start ceph OSD cache dropper +# k8s: +# definition: "{{ lookup('template', '/opt/ansible/roles/ceph_osd_cache_drop/rook_ceph_drop_cache_pod.yaml') | from_yaml }}" +# register: ceph_cache_dropper_start +# when: drop_pod_already_exists.resources | length == 0 +# +#- name: wait for cache dropper to start +# shell: "python3 /opt/ansible/roles/ceph_osd_cache_drop/wait_for_pod.py 30 {{ rook_ceph_namespace }} rook-ceph-osd-cache-drop" +# when: rook_ceph_cluster_exists and drop_pod_already_exists.resources | length == 0 + +- name: get cache dropper pod + k8s_info: + kind: Pod + label_selectors: + - app = rook-ceph-osd-cache-drop + namespace: "{{ rook_ceph_namespace }}" + register: ceph_osd_cache_drop_pod + +#- debug: +# var: ceph_osd_cache_drop_pod + +- name: put ip into a var + set_fact: + ceph_osd_cache_drop_pod_ip: "{{ ceph_osd_cache_drop_pod.resources[0].status.podIP }}" + +- debug: + var: ceph_osd_cache_drop_pod_ip + +- name: test IP + shell: "curl http://{{ ceph_osd_cache_drop_pod_ip }}:{{ ceph_cache_drop_svc_port }}/" + register: drop_pod_test + +#- debug: +# var: drop_pod_test + diff --git a/roles/ceph_osd_cache_drop/vars/main.yml b/roles/ceph_osd_cache_drop/vars/main.yml new file mode 100644 index 000000000..517af5d01 --- /dev/null +++ b/roles/ceph_osd_cache_drop/vars/main.yml @@ -0,0 +1,3 @@ +# when debugging set this var, for example: +#ceph_cache_drop_image: quay.io/bengland2/ceph-cache-dropper:debug + diff --git a/roles/ceph_osd_cache_drop/wait_for_pod.py b/roles/ceph_osd_cache_drop/wait_for_pod.py new file mode 100644 index 000000000..297631cd4 --- /dev/null +++ b/roles/ceph_osd_cache_drop/wait_for_pod.py @@ -0,0 +1,75 @@ +#!/usr/bin/python3 +# wait_for_pod.py - +# wait until toolbox pod and web service pod is in Running state for specified deploy +# see usage() for input params + +from kubernetes import client, config +from sys import argv, exit +from os import getenv +import time + +NOTOK = 1 +# may have to adjust this based on number of pods +poll_interval = 2.0 + +def usage(msg): + print('ERROR: %s' % msg) + print('usage: wait_for_pod.py timeout namespace pod-name-pattern') + exit(NOTOK) + + +# parse command line + +if len(argv) < 4: + usage('too few parameters') + +timeout_str = argv[1] +ns = argv[2] +pod_name_pattern = argv[3] + +# show 'em what we parsed + +print('timeout if pods not seen in %s sec' % timeout_str) +print('namespace: %s' % ns) +print('pod name pattern: %s' % pod_name_pattern) + +timeout = int(timeout_str) +if timeout <= poll_interval: + usage('timeout %d must be greater than poll interval %d' % + (timeout, poll_interval)) + +# wait for pods + +# cannot do this from inside cluster pod: config.load_kube_config() +if getenv('KUBECONFIG'): + config.load_kube_config() +else: + config.load_incluster_config() +v1 = client.CoreV1Api() + +print('waiting for pod...') +start_time = time.time() +matching_pods = 0 + +while True: + time.sleep(poll_interval) + now_time = time.time() + delta_time = now_time - start_time + if delta_time > timeout: + break + + ret = v1.list_pod_for_all_namespaces(watch=False) + matching_pods = 0 + for i in ret.items: + if i.metadata.namespace == ns and \ + i.metadata.generate_name.__contains__ (pod_name_pattern) and \ + i.status.phase == 'Running': + matching_pods += 1 + if matching_pods >= 1: + break + +if delta_time > timeout: + usage('timeout waiting for pods to reach running state') + +if matching_pods != 1: + usage('expected 1 pod, found %d pods' % matching_pods) diff --git a/roles/fio_distributed/templates/client.yaml b/roles/fio_distributed/templates/client.yaml index ecef56843..372cca38e 100644 --- a/roles/fio_distributed/templates/client.yaml +++ b/roles/fio_distributed/templates/client.yaml @@ -22,7 +22,7 @@ spec: - name: uuid value: "{{ uuid }}" - name: test_user - value: "{{ test_user | default("ripsaw") }}" + value: "{{ test_user | default('ripsaw') }}" - name: clustername value: "{{ clustername }}" - name: ceph_cache_drop_pod_ip @@ -31,11 +31,17 @@ spec: value: "{{ kcache_drop_pod_ips | default() }}" - name: KCACHE_DROP_PORT_NUM value: "{{ kernel_cache_drop_svc_port }}" +{% if ceph_osd_cache_drop_pod_ip is defined %} + - name: ceph_osd_cache_drop_pod_ip + value: "{{ ceph_osd_cache_drop_pod_ip }}" + - name: CEPH_CACHE_DROP_PORT_NUM + value: "{{ ceph_cache_drop_svc_port }}" +{% endif %} {% if elasticsearch is defined %} - name: es value: "{{ elasticsearch.url }}" - name: es_index - value: "{{ elasticsearch.index_name | default("ripsaw-fio") }}" + value: "{{ elasticsearch.index_name | default('ripsaw-fio') }}" - name: es_verify_cert value: "{{ elasticsearch.verify_cert | default(true) }}" - name: parallel diff --git a/roles/kernel_cache_drop/tasks/main.yml b/roles/kernel_cache_drop/tasks/main.yml index f8b486b2f..de1a3a94f 100644 --- a/roles/kernel_cache_drop/tasks/main.yml +++ b/roles/kernel_cache_drop/tasks/main.yml @@ -17,8 +17,8 @@ namespace: "{{ operator_namespace }}" register: kcache_drop_pod_list -- debug: - var: kcache_drop_pod_list +#- debug: +# var: kcache_drop_pod_list - name: put ip list into a var set_fact: diff --git a/roles/smallfile/templates/workload_job.yml.j2 b/roles/smallfile/templates/workload_job.yml.j2 index 96880328b..8d28b82ec 100644 --- a/roles/smallfile/templates/workload_job.yml.j2 +++ b/roles/smallfile/templates/workload_job.yml.j2 @@ -45,12 +45,20 @@ spec: value: "{{ test_user | default("ripsaw") }}" - name: clustername value: "{{ clustername }}" -{% if kcache_drop_pod_ips is defined and item | int == 1 %} +{% if item | int == 1 %} +{% if kcache_drop_pod_ips is defined %} - name: kcache_drop_pod_ips - value: "{{ kcache_drop_pod_ips | default() }}" + value: "{{ kcache_drop_pod_ips }}" - name: KCACHE_DROP_PORT_NUM value: "{{ kernel_cache_drop_svc_port }}" {% endif %} +{% if ceph_osd_cache_drop_pod_ip is defined %} + - name: ceph_osd_cache_drop_pod_ip + value: "{{ ceph_osd_cache_drop_pod_ip }}" + - name: CEPH_CACHE_DROP_PORT_NUM + value: "{{ ceph_cache_drop_svc_port }}" +{% endif %} +{% endif %} {% if elasticsearch is defined %} - name: es value: "{{ elasticsearch.url }}" diff --git a/tests/test_crs/valid_fiod_ocs_cache_drop.yaml b/tests/test_crs/valid_fiod_ocs_cache_drop.yaml new file mode 100644 index 000000000..47f3d0fcc --- /dev/null +++ b/tests/test_crs/valid_fiod_ocs_cache_drop.yaml @@ -0,0 +1,85 @@ +apiVersion: ripsaw.cloudbulldozer.io/v1alpha1 +kind: Benchmark +metadata: + name: example-benchmark + namespace: my-ripsaw +spec: + system_metrics: + collection: true + prom_url: https://prometheus-k8s.openshift-monitoring.svc.cluster.local:9091 + es_url: ES_SERVER + prom_token: PROMETHEUS_TOKEN + metrics_profile: node-metrics.yml + elasticsearch: + url: ES_SERVER + index_name: ripsaw-fio + metadata: + collection: true + cleanup: false + workload: + name: "fio_distributed" + args: + drop_cache_rook_ceph: true + drop_cache_kernel: true + samples: 2 + servers: 2 + jobs: + - randwrite + - randread + - randrw + bs: + - 4KiB + numjobs: + - 2 + iodepth: 1 + read_runtime: 10 + write_runtime: 10 + read_ramp_time: 1 + write_ramp_time: 1 + filesize: 10MiB + log_sample_rate: 2000 + storagesize: 16Mi + debug: false +####################################### +# EXPERT AREA - MODIFY WITH CAUTION # +####################################### + job_params: + - jobname_match: write + params: + - time_based=1 + - fsync_on_close=1 + - create_on_open=1 + - runtime={{ workload_args.write_runtime }} + - ramp_time={{ workload_args.write_ramp_time }} + - jobname_match: read + params: + - time_based=1 + - runtime={{ workload_args.read_runtime }} + - ramp_time={{ workload_args.read_ramp_time }} + - jobname_match: rw + params: + - rwmixread=50 + - time_based=1 + - runtime={{ workload_args.read_runtime }} + - ramp_time={{ workload_args.read_ramp_time }} + - jobname_match: readwrite + params: + - rwmixread=50 + - time_based=1 + - runtime={{ workload_args.read_runtime }} + - ramp_time={{ workload_args.read_ramp_time }} + - jobname_match: randread + params: + - time_based=1 + - runtime={{ workload_args.read_runtime }} + - ramp_time={{ workload_args.read_ramp_time }} + - jobname_match: randwrite + params: + - time_based=1 + - runtime={{ workload_args.write_runtime }} + - ramp_time={{ workload_args.write_ramp_time }} + - jobname_match: randrw + params: + - time_based=1 + - runtime={{ workload_args.write_runtime }} + - ramp_time={{ workload_args.write_ramp_time }} diff --git a/tests/test_fiod.sh b/tests/test_fiod.sh index cdb5d33e5..042a764dc 100755 --- a/tests/test_fiod.sh +++ b/tests/test_fiod.sh @@ -35,7 +35,7 @@ function functional_test_fio { fio_pod=$(get_pod "app=fiod-client-$uuid" 300) wait_for "kubectl wait --for=condition=Initialized pods/$fio_pod -n my-ripsaw --timeout=500s" "500s" $fio_pod wait_for "kubectl wait --for=condition=complete -l app=fiod-client-$uuid jobs -n my-ripsaw --timeout=700s" "700s" $fio_pod - + kubectl -n my-ripsaw logs $fio_pod > /tmp/$fio_pod.log indexes="ripsaw-fio-results ripsaw-fio-log ripsaw-fio-analyzed-result" if check_es "${long_uuid}" "${indexes}" then @@ -50,5 +50,17 @@ function functional_test_fio { figlet $(basename $0) kubectl label nodes -l node-role.kubernetes.io/worker= kernel-cache-dropper=yes --overwrite functional_test_fio "Fio distributed" tests/test_crs/valid_fiod.yaml +openshift_storage_present=$(oc get namespace | awk '/openshift-storage/' | wc -l) +if [ $openshift_storage_present -gt 0 ] ; then + oc patch OCSInitialization ocsinit -n openshift-storage --type json --patch \ + '[{ "op": "replace", "path": "/spec/enableCephTools", "value": true }]' + drop_cache_pods=$(oc -n openshift-storage get pod | awk '/drop/' | awk '/unning/' | wc -l) + if [ $drop_cache_pods -eq 0 ] ; then + oc create -f roles/ceph_osd_cache_drop/rook_ceph_drop_cache_pod.yaml + kubectl wait --for=condition=Initialized pods/rook-ceph-osd-cache-drop -n openshift-storage --timeout=100s + fi + sleep 5 + functional_test_fio "Fio cache drop" tests/test_crs/valid_fiod_ocs_cache_drop.yaml +fi functional_test_fio "Fio distributed - bsrange" tests/test_crs/valid_fiod_bsrange.yaml functional_test_fio "Fio hostpath distributed" tests/test_crs/valid_fiod_hostpath.yaml