From 4f87bdaae0c23583716089a0b62cd80ab732f3c9 Mon Sep 17 00:00:00 2001
From: Edwin Chong <5728235+ewchong@users.noreply.github.com>
Date: Fri, 15 Oct 2021 11:27:52 -0400
Subject: [PATCH] add coremark pro support

---
 ci/common.sh                                |   1 +
 snafu/benchmarks/coremarkpro/Dockerfile     |  13 +
 snafu/benchmarks/coremarkpro/README.md      | 207 ++++++++++++++++
 snafu/benchmarks/coremarkpro/__init__.py    |   5 +
 snafu/benchmarks/coremarkpro/ci_test.sh     |  11 +
 snafu/benchmarks/coremarkpro/coremarkpro.py | 261 ++++++++++++++++++++
 6 files changed, 498 insertions(+)
 create mode 100644 snafu/benchmarks/coremarkpro/Dockerfile
 create mode 100644 snafu/benchmarks/coremarkpro/README.md
 create mode 100644 snafu/benchmarks/coremarkpro/__init__.py
 create mode 100755 snafu/benchmarks/coremarkpro/ci_test.sh
 create mode 100644 snafu/benchmarks/coremarkpro/coremarkpro.py

diff --git a/ci/common.sh b/ci/common.sh
index f3cf5a38a..4a499257b 100644
--- a/ci/common.sh
+++ b/ci/common.sh
@@ -27,6 +27,7 @@ function update_operator_image() {
   sed -i "s#${default_ripsaw_image_prefix}/pgbench:latest#${SNAFU_WRAPPER_IMAGE_PREFIX}/pgbench:${SNAFU_IMAGE_TAG}#g"  roles/pgbench/defaults/main.yml
   sed -i "s#${default_ripsaw_image_prefix}/smallfile:master#${SNAFU_WRAPPER_IMAGE_PREFIX}/smallfile:${SNAFU_IMAGE_TAG}#g" roles/smallfile/templates/* roles/smallfile/tasks/*
   sed -i "s#${default_ripsaw_image_prefix}/sysbench:latest#${SNAFU_WRAPPER_IMAGE_PREFIX}/sysbench:${SNAFU_IMAGE_TAG}#g" roles/sysbench/templates/*
+  sed -i "s#${default_ripsaw_image_prefix}/coremark-pro:latest#${SNAFU_WRAPPER_IMAGE_PREFIX}/sysbench:${SNAFU_IMAGE_TAG}#g" roles/coremarkpro/templates/*
   sed -i "s#${default_ripsaw_image_prefix}/uperf:latest#${SNAFU_WRAPPER_IMAGE_PREFIX}/uperf:${SNAFU_IMAGE_TAG}#g" roles/uperf/templates/*
   sed -i "s#${default_ripsaw_image_prefix}/ycsb-server:latest#${SNAFU_WRAPPER_IMAGE_PREFIX}/ycsb-server:${SNAFU_IMAGE_TAG}#g" roles/ycsb/templates/*
   sed -i "s#${default_ripsaw_image_prefix}/vegeta:latest#${SNAFU_WRAPPER_IMAGE_PREFIX}/vegeta:${SNAFU_IMAGE_TAG}#g" roles/vegeta/templates/*
diff --git a/snafu/benchmarks/coremarkpro/Dockerfile b/snafu/benchmarks/coremarkpro/Dockerfile
new file mode 100644
index 000000000..f23d1d5e9
--- /dev/null
+++ b/snafu/benchmarks/coremarkpro/Dockerfile
@@ -0,0 +1,13 @@
+FROM registry.access.redhat.com/ubi8:latest
+MAINTAINER Sai Sindhur Malleni <smalleni@redhat.org>
+
+RUN dnf install -y --nodocs make git gcc && dnf clean all
+RUN git clone https://github.com/eembc/coremark-pro.git && cd coremark-pro && ls util/make && make TARGET=linux64 build
+WORKDIR /output/
+
+RUN dnf install -y --nodocs python3.8 python38-devel procps-ng iproute net-tools ethtool nmap iputils && dnf clean all
+RUN ln -s /usr/bin/python3 /usr/bin/python
+RUN pip3 install --upgrade pip
+#RUN echo -e '[global]\nextra-index-url=https://www.piwheels.org/simple' > /etc/pip.conf
+COPY . /opt/snafu
+RUN pip3 install -r /opt/snafu/requirements/py38-reqs/install.txt -e /opt/snafu
diff --git a/snafu/benchmarks/coremarkpro/README.md b/snafu/benchmarks/coremarkpro/README.md
new file mode 100644
index 000000000..6d1c689cc
--- /dev/null
+++ b/snafu/benchmarks/coremarkpro/README.md
@@ -0,0 +1,207 @@
+# CoreMark-Pro
+
+Wrapper for [CoreMark-Pro](https://github.com/eembc/coremark-pro) which is a CPU benchmarking tool that provides a single number score for easy comparison across runs.
+
+## Overview of Operations
+
+- A path to where CoreMark-Pro has been cloned is provided to benchmark-wrapper since there is no install
+mechanism.
+- Executing the benchmark is done with `make` and also compiles the benchmark if not already done.
+- A log folder is created from the exection, where only the `.log` and `.mark` file are processed:
+
+  ```
+  coremark-pro/builds/linux64/gcc64/logs
+  ├── linux64.gcc64.log       # Raw logs of the CoreMark Pro run
+  ├── linux64.gcc64.mark      # Results: both individual workloads and overall score
+  ├── progress.log            # Does not process
+  ├── zip-test.run.log        # Does not process
+  └── zip-test.size.log       # Does not process
+  ```
+- The results are ingested into two different Elasticsarch indexes:
+    - `*-coremark-pro-summary`: Results from the `.mark` file. Provides the calculated results from CoreMark-Pro.
+    - `*-coremark-pro-raw`: Raw logs from the `.log` file. Intended for analyzing the logs manually.
+
+## Arguments
+
+### Required
+
+- `-p` / `--path` Directory where CoreMark Pro is located.
+
+### Optional
+
+- `-c` / `--context`: CoreMark Pro's context argument. Defaults to `0`.
+- `-w` / `--workload`: CoreMark Pro's workload argument. Defaults to `1`.
+- `-s` / `--sample`: Number of samples to run. Defaults to `1`.
+- `-r` / `--result-name`: The name of CoreMark Pro's result files. This includes the path relative to `--path` and does not include the extension. Defaults to `builds/linux64/gcc64/logs/linux64.gcc64`
+- `-u` / `--upload`: Parses existing results in a CoreMark-Pro log directory. No support for multiple samples and `sample_starttime` is based on when benchmark-wrapper is run. Mainly used for debugging.
+
+
+## Running inside a container
+
+The Dockerfile has CoreMark-Pro pre-built and is located at `/coremark-pro/`. This will need
+to be passed to benchmark-wrapper with the `--path` command.
+
+Rest of this section will cover common use-cases that need additional parameters.
+
+### Archive file
+
+To create an archive file and make it accessible to the host system, the `WORKDIR` is set to
+`/output/` and can be mounted to the host system. Example:
+
+```
+podman run -it \
+-v ./FOLDER_TO_SAVE_ARCHIVE/:/output/ \
+coremark-pro run_snafu -t coremark-pro --path /coremark-pro/ --create-archive \
+--archive coremarkpro.archive
+```
+
+### Raw logs
+
+To retrieve the raw logs, mount the log folder from CoreMark-Pro to a **dedicated** folder on the host system otherwise all contents of the folder will be **deleted** when CoreMark-Pro is executed. Default folder is
+`/coremark-pro/builds/linux64/gcc/logs/`.
+
+Example of logs folder being saved to a `output` folder in the current directory:
+
+```
+podman run -it \
+-w /coremark-pro/builds/linux64/gcc64/logs/ \
+-v ./output/:/coremark-pro/builds/linux64/gcc64/logs/ \
+coremark-pro run_snafu -t coremark-pro -p /coremark-pro/
+```
+
+## Parsing
+
+This section gives a general idea of how CoreMark-Pro output matches with the Elasticsearch fields.
+
+### Results
+
+These results are calculated by CoreMark-Pro and read from the `*.mark` file. Each row of the table is ingested as its own record.
+
+#### Example `.mark` file
+
+```
+WORKLOAD RESULTS TABLE
+
+                                                 MultiCore SingleCore
+Workload Name                                     (iter/s)   (iter/s)    Scaling
+----------------------------------------------- ---------- ---------- ----------
+cjpeg-rose7-preset                                  178.57     192.31       0.93
+
+.... truncated rest of the workloads ...
+
+MARK RESULTS TABLE
+
+Mark Name                                        MultiCore SingleCore    Scaling
+----------------------------------------------- ---------- ---------- ----------
+CoreMark-PRO                                       5708.35    5714.89       1.00
+```
+
+#### Benchmark-wrapper's archive file output
+
+```
+{
+    "_source": {
+        "test_config": {
+            "workload": 0,                   # `--workload`
+            "context": 1                     # `--context`
+        },
+        "sample_starttime": "2021-10.1..",   # Time when benchmark-wrapper was executed.
+        "sample": 1,                         # `--sample`
+        "name": "cjpeg-rose7-preset,         # Name of the CoreMark-Pro workload
+        "multicore": 178.57,                 # Multi Core result
+        "singlecore": 192.31,                # Single Core result
+        "scaling": 0.93,                     # Scaling result
+        "type": "workload",                  # Type of result, determined by the table header
+                                             # - `workload`: Data from 'Workload Results Table'
+                                             # - `mark`: Data from 'Mark Results Table'
+        "cluster_name": "laptop",
+        "user": "ed",
+        "uuid": "3cc2e4a9-bd7f-4394-8d8c-66415ceeb02f",
+        "workload": "coremark-pro",
+        "run_id": "NA"
+    },
+}
+
+... The above is repeated for the rest of the workloads and the mark result ...
+
+```
+
+### Raw logs
+
+These are the raw logs parsed from the `.log` file. The median results are dropped since they can be derived using Elasticsearch. Each row of results is ingested as its own record.
+
+
+#### Excerpt of a log file
+
+```
+#UID            Suite Name                                     Ctx Wrk Fails       t(s)       Iter     Iter/s  Codesize   Datasize
+#Results for verification run started at 21285:10:58:22 XCMD=-c1 -w0
+236760500         MLT cjpeg-rose7-preset                         1   1     0      0.010          1     100.00    105616     267544
+#Results for performance runs started at 21285:10:58:23 XCMD=-c1 -w0
+236760500         MLT cjpeg-rose7-preset                         1   1     0      0.081         10     123.46    105616     267544
+... truncated rest of the log ...
+```
+
+CoreMark-Pro performs two sets of runs for each workload that are marked by the same `uid`.  Each set of runs has a single verification run and three performance runs. The number of runs is non-configurable. Structure of the runs:
+```
+Set 1: Context = 1 Workload = 1
+├─── Workload Verification Run
+├─── Performance run #1
+├─── Performance run #2
+└─── Performance run #3
+
+... Repeat for all workloads ...
+
+Set 2: Context and workload specified by user through -w / -c
+├─── Workload Verification Run
+├─── Performance run #1
+├─── Performance run #2
+└─── Performance run #3
+
+... Repeat for all workloads ...
+
+```
+#### Benchmark-wrapper's archive file output
+
+A `run_index` field was added to ensure performance runs with the same results are not marked as duplicates.
+
+```
+{
+    "_source": {
+        ## Same as the `.mark` file
+        "test_config": {
+            "workload": 0,
+            "context": 1
+        },
+        "sample_starttime": "2021-10.1..",   # Time when benchmark-wrapper was executed.
+        "sample": 1,
+
+        ## Results from the logs
+        "uid": "236760500",           # A UID generated per workload by CoreMark-Pro
+        "suite": "MLT",
+        "name": "cjpeg-rose7-preset",
+        "ctx": 1,
+        "wrk": 1,
+        "fails": 0,
+        "t(s)": 0.01,
+        "iter": 1,
+        "iter/s": 100.0,
+        "codesize": 105616,
+        "datasize": 267544,
+        "type": "verification",       # Possible types: verification / performance
+        "starttime": "2021-10....",   # The start time for the runs as recorded by CoreMark Pro
+        "run_index": 0,               # An index of how many runs of the same type. Always
+                                      # 0 for verification, between 0-2 for performance runs.
+
+        ## Same as the `.mark` file
+        "cluster_name": "laptop",
+        "user": "ed",
+        "uuid": "816f7fe9-ab04-45a4-8a1f-ce61c2fe11e6",
+        "workload": "coremark-pro",
+        "run_id": "NA"
+    },
+}
+```
+## Limitations
+
+- Limited ability to visualize the data from `*-coremark-pro-raw`, requires additional fields to aggregate the runs.
diff --git a/snafu/benchmarks/coremarkpro/__init__.py b/snafu/benchmarks/coremarkpro/__init__.py
new file mode 100644
index 000000000..1d3d217af
--- /dev/null
+++ b/snafu/benchmarks/coremarkpro/__init__.py
@@ -0,0 +1,5 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# flake8: noqa
+"""Uperf benchmark"""
+from snafu.benchmarks.coremarkpro.coremarkpro import Coremarkpro
diff --git a/snafu/benchmarks/coremarkpro/ci_test.sh b/snafu/benchmarks/coremarkpro/ci_test.sh
new file mode 100755
index 000000000..ff12d338c
--- /dev/null
+++ b/snafu/benchmarks/coremarkpro/ci_test.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+set -x
+
+source ci/common.sh
+
+# Build image for ci
+image_spec=$SNAFU_WRAPPER_IMAGE_PREFIX/coremark-pro:$SNAFU_IMAGE_TAG
+build_and_push snafu/coremarkpro/Dockerfile $image_spec
+pushd ripsaw
+source tests/test_coremarkpro.sh
diff --git a/snafu/benchmarks/coremarkpro/coremarkpro.py b/snafu/benchmarks/coremarkpro/coremarkpro.py
new file mode 100644
index 000000000..f52516171
--- /dev/null
+++ b/snafu/benchmarks/coremarkpro/coremarkpro.py
@@ -0,0 +1,261 @@
+# /usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""Runs CoreMark Pro."""
+import os
+import re
+import shlex
+import uuid
+from datetime import datetime
+from typing import Dict, Iterable, List
+
+from dateutil import tz
+
+from snafu.benchmarks import Benchmark, BenchmarkResult
+from snafu.config import ConfigArgument
+from snafu.process import sample_process
+
+
+class Coremarkpro(Benchmark):
+    """Wrapper for CoreMark Pro"""
+
+    # Set for Benchmark Class
+    tool_name = "coremark-pro"
+    args = (
+        ConfigArgument(
+            "-p",
+            "--path",
+            dest="path",
+            type=str,
+            help="Path to coremark-pro's directory",
+            required=True,
+        ),
+        ConfigArgument(
+            "-c",
+            "--context",
+            dest="context",
+            type=int,
+            help="CoreMark Pro's context",
+            default=1,
+            required=False,
+        ),
+        ConfigArgument(
+            "-w",
+            "--workloads",
+            help="CoreMark Pro's workloads",
+            dest="workload",
+            type=int,
+            default=0,
+            required=False,
+        ),
+        ConfigArgument(
+            "-s",
+            "--sample",
+            dest="sample",
+            env_var="SAMPLE",
+            default=1,
+            type=int,
+            help="Number of times to run the benchmark",
+            required=False,
+        ),
+        ConfigArgument(
+            "-r",
+            "--result-name",
+            dest="result_name",
+            default="builds/linux64/gcc64/logs/linux64.gcc64",
+            type=str,
+            help="Name of CoreMark Pro's result files. Path is relative to `--path` and no extenstion.",
+            required=False,
+        ),
+        ConfigArgument(
+            "-u",
+            "--upload",
+            dest="upload",
+            default=False,
+            type=bool,
+            help="Parse results from previous CoreMark Run",
+            required=False,
+        ),
+    )
+
+    result_config: Dict = {}
+
+    """ Helper functions"""
+
+    def build_workload_cmd(self) -> List[str]:
+        """
+        Builds the command line arguments needed to run CoreMark Pro
+        """
+
+        xcmd = f" -c{self.config.context} -w{self.config.workload}"
+        return shlex.split(f"make TARGET=linux64 certify-all XCMD='{xcmd}'")
+
+    def create_raw_results(self) -> Iterable[BenchmarkResult]:
+        """
+        Parses the raw results logs from CoreMark Pro into a Benchmark result. Ignores any median results.
+        """
+
+        headers = [
+            "uid",
+            "suite",
+            "name",
+            "ctx",
+            "wrk",
+            "fails",
+            "t(s)",
+            "iter",
+            "iter/s",
+            "codesize",
+            "datasize",
+        ]
+        types = [str, str, str, int, int, int, float, int, float, int, int]
+
+        with open(self.config.path + self.config.result_name + ".log", "rt", encoding="utf-8") as file:
+            results = []
+            prev_run_type = ""
+            run_type = ""
+            run_index = 0
+            run_starttime = ""
+            for line in file:
+                # Look for the following string in the logs:
+                #    Results for `run_type` started at `timestamp`
+                result = re.search(r"^#Results for (\w+) .* (\d+:\d+:\d+:\d+) XCMD", line)
+                if result:
+                    prev_run_type = run_type
+                    (run_type, run_starttime) = result.group(1, 2)
+                    continue
+
+                # Ignore median results since it can be derived
+                if "median" not in line:
+                    if re.search(r"^\d+", line):
+                        # Adds a basic sequence number for the runs to avoid performance
+                        # runs with the same result from being flagged as a duplicate.
+                        if prev_run_type != run_type:
+                            run_index = 0
+                            prev_run_type = run_type
+                        cols = re.split(r"\s+", line.rstrip())
+                        converted_cols = [func(val) for func, val in zip(types, cols)]
+                        record = dict(zip(headers, converted_cols))
+                        record["type"] = run_type
+                        record["starttime"] = self.convert_coremark_timestamp(run_starttime)
+                        record["run_index"] = run_index
+                        run_index += 1
+                        results.append(record)
+                        yield self.create_new_result(
+                            data=record,
+                            config=self.result_config,
+                            tag="raw",
+                        )
+
+    def create_summary_results(self) -> Iterable[BenchmarkResult]:
+        """
+        Parses the CoreMark Pro's 'mark' file which has the scores calculated
+        """
+
+        headers = ["name", "multicore", "singlecore", "scaling"]
+        types = [str, float, float, float]
+
+        with open(self.config.path + self.config.result_name + ".mark", "rt", encoding="utf-8") as file:
+            table_name = ""
+            for line in file:
+                line = line.rstrip()
+                if not line:
+                    continue
+
+                # Find where the table starts and skips the fluff
+                if "RESULTS TABLE" in line:
+                    table_name = line.split(" ")[0].lower()
+                    while True:
+                        # Exit out of loop once it finds the table delimiter
+                        # Disable pylint false positive, doesn't impact the generator
+                        # pylint: disable=stop-iteration-return
+                        if "---" in next(file):
+                            break
+                    continue
+
+                cols = re.split(r"\s+", line.rstrip())
+                converted_cols = [func(val) for func, val in zip(types, cols)]
+                record = dict(zip(headers, converted_cols))
+                record["type"] = table_name
+
+                yield self.create_new_result(
+                    data=record,
+                    config=self.result_config,
+                    tag="summary",
+                )
+
+    @staticmethod
+    def convert_coremark_timestamp(timestamp) -> str:
+        """
+        Converts CoreMark Pro's timestamp in the raw logs into a ES friendly date format
+        """
+
+        time_obj = datetime.strptime(timestamp, "%y%j:%H:%M:%S")
+        utc_tz = tz.gettz("UTC")
+
+        return (time_obj.astimezone(utc_tz)).strftime("%Y-%m-%dT%H:%M:%S.%fZ")
+
+    def setup(self) -> bool:
+
+        # Parse the command line args
+        self.config.parse_args()
+
+        self.logger.info("Building CoreMark Pro")
+        build = sample_process(
+            ["make", "build"],
+            self.logger,
+            retries=2,
+            expected_rc=0,
+            cwd=self.config.path,
+            env=self.config.get_env(),
+        )
+        result = next(iter(build))
+        if not result.success:
+            self.logger.critical(f"Failed to buiild CoreMark Pro! Got results: {result}")
+            return False
+
+        # Sets up defaults for the required variables
+        self.config.uuid = os.getenv("uuid", str(uuid.uuid4()))
+        self.config.user = os.getenv("test_user", "myuser")
+        if "clustername" not in os.environ:
+            self.config.cluster_name = "mycluster"
+
+        self.result_config["test_config"] = {
+            "workload": self.config.workload,
+            "context": self.config.context,
+        }
+
+        return True
+
+    def collect(self) -> Iterable[BenchmarkResult]:
+
+        cmd = self.build_workload_cmd()
+
+        if not self.config.upload:
+            samples = sample_process(
+                cmd,
+                self.logger,
+                num_samples=self.config.sample,
+                retries=2,
+                expected_rc=0,
+                cwd=self.config.path,
+                env=self.config.get_env(),
+            )
+
+            for sample_num, sample in enumerate(samples):
+                self.logger.info(f"Starting coremark-pro sample number {sample_num}")
+
+                self.result_config["sample_starttime"] = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%fZ")
+                self.result_config["sample"] = self.config.sample
+                if not sample.success:
+                    self.logger.critical(f"Failed to run! Got results: {sample}")
+                else:
+                    yield from self.create_raw_results()
+                    yield from self.create_summary_results()
+        else:
+            self.result_config["sample_starttime"] = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%fZ")
+            self.result_config["sample"] = self.config.sample
+            yield from self.create_raw_results()
+            yield from self.create_summary_results()
+
+    def cleanup(self):
+        return True