Skip to content

Commit

Permalink
feat,ci: move check scripts to separate files
Browse files Browse the repository at this point in the history
- Easier to run and debug as separate files
- Can be used locally, too
- Use GraalVM to compile native binary for accents removal with same Java code as used in application (also uses JBang as build system)
- "Just" using JBang is not fast enough, JVM startup times are making it sluggish!
  • Loading branch information
poikilotherm committed Jul 2, 2024
1 parent 07d67ac commit f4b61bf
Show file tree
Hide file tree
Showing 3 changed files with 151 additions and 71 deletions.
88 changes: 17 additions & 71 deletions .github/workflows/check_property_files.yml
Original file line number Diff line number Diff line change
@@ -1,88 +1,34 @@
name: "Properties Check"
on:
pull_request:
#paths:
# - "**/*.properties"
# - "scripts/api/data/metadatablocks/*"
paths:
- "src/**/*.properties"
- "scripts/api/data/metadatablocks/*"
jobs:
duplicate_keys:
name: Duplicate Keys
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Run duplicates detection script
shell: bash
run: |
FAIL=0
for PF in $(find . -wholename '*/src/*.properties'); do
FILTER=$(grep -a -v -E "^(#.*|\s*$)" "$PF" | cut -d"=" -f1 | sort | uniq -c | tr -s " " | { grep -vs "^ 1 " || true; })
if [ -n "$FILTER" ]; then
FAIL=1
echo "::group::$PF"
for KEY in $(echo "$FILTER" | cut -d" " -f3); do
for LINE in $(grep -n -E -e "^$KEY=" "$PF" | cut -d":" -f1); do
echo "::error file=$PF,line=$LINE::Found duplicate for key '$KEY' in line $LINE"
done
done
echo "::endgroup::"
fi
done
if [ "$FAIL" -eq 1 ]; then
exit 1
fi
run: tests/check_duplicate_properties.sh

metadata_blocks_properties:
name: Metadata Blocks Properties
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Run metadata block properties verification script
- uses: actions/checkout@v4
- name: Install JBang
shell: bash
run: |
for MDB in $(find scripts/api/data/metadatablocks -name '*.tsv'); do
BLOCK_NAME=$(sed -n "2p" "$MDB" | cut -f2)
BLOCK_DISPLAYNAME=$(sed -n "2p" "$MDB" | cut -f4)
PROPERTIES_FILE="src/main/java/propertyFiles/$BLOCK_NAME.properties"
# Check correct file exists
if [ ! -r "$PROPERTIES_FILE" ]; then
echo "::error::Missing properties file for metadata block '$BLOCK_NAME', expected at '$PROPERTIES_FILE'"
continue
fi
# Check metadata block properties exist and are equal to TSV source
if ! grep -a -q -e "^metadatablock.name=$BLOCK_NAME$" "$PROPERTIES_FILE"; then
echo "::error::Missing 'metadatablock.name=$BLOCK_NAME' or different from TSV source"
fi
if ! grep -a -q -e "^metadatablock.displayName=$BLOCK_DISPLAYNAME$" "$PROPERTIES_FILE"; then
echo "::error::Missing 'metadatablock.displayName=$BLOCK_DISPLAYNAME' or different from TSV source"
fi
if ! grep -a -q -e "^metadatablock.displayFacet=" "$PROPERTIES_FILE"; then
echo "::error::Missing 'metadatablock.displayFacet=...'"
fi
# Check dataset fields
for FIELD in $(grep -a -A1000 "^#datasetField" "$MDB" | tail -n+2 | grep -a -B1000 "^#controlledVocabulary" | head -n-1 | cut -f2); do
for ENTRY in title description watermark; do
if ! grep -a -q -e "^datasetfieldtype.$FIELD.$ENTRY=" "$PROPERTIES_FILE"; then
echo "::error::Missing key 'datasetfieldtype.$FIELD.$ENTRY=...'"
fi
done
done
# Check CV entries
grep -a -A1000 "^#controlledVocabulary" "$MDB" | tail -n+2 |
{
while read LINE; do
FIELD_NAME=$(echo "$LINE" | cut -f1)
# TODO: needs to replace UTF-8 chars with nearest ascii here!
FIELD_VALUE=$(echo "$LINE" | cut -f2 | tr '[:upper:]' '[:lower:]' | tr " " "_")
if ! grep -q -a -e "^controlledvocabulary.$FIELD_NAME.$FIELD_VALUE=" "$PROPERTIES_FILE"; then
echo "::error::Missing key 'controlledvocabulary.$FIELD_NAME.$FIELD_VALUE=...'"
fi
done
};
done
curl -Ls https://sh.jbang.dev | bash -s - app setup
- name: Install GraalVM + Native Image
uses: graalvm/setup-graalvm@v1
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
java-version: '21'
distribution: 'graalvm-community'
- name: Run metadata block properties verification script
shell: bash
run: tests/verify_mdb_properties.sh
37 changes: 37 additions & 0 deletions tests/check_duplicate_properties.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/bin/bash

# This script will check Java *.properties files within the src dir for duplicates
# and print logs with file annotations about it.

set -euo pipefail

FAIL=0

while IFS= read -r -d '' FILE; do

# Scan the whole file for duplicates
FILTER=$(grep -a -v -E "^(#.*|\s*$)" "$FILE" | cut -d"=" -f1 | sort | uniq -c | tr -s " " | { grep -vs "^ 1 " || true; })

# If there are any duplicates present, analyse further to point people to the source
if [ -n "$FILTER" ]; then
FAIL=1

echo "::group::$FILE"
for KEY in $(echo "$FILTER" | cut -d" " -f3); do
# Find duplicate lines' numbers by grepping for the KEY and cutting the number from the output
DUPLICATE_LINES=$(grep -n -E -e "^$KEY=" "$FILE" | cut -d":" -f1)
# Join the found line numbers for better error log
DUPLICATE_NUMBERS=$(echo "$DUPLICATE_LINES" | paste -sd ',')

# This form will make Github annotate the lines in the PR that changes the properties file
for LINE_NUMBER in $DUPLICATE_LINES; do
echo "::error file=$FILE,line=$LINE_NUMBER::Found duplicate for key '$KEY' in lines $DUPLICATE_NUMBERS"
done
done
echo "::endgroup::"
fi
done < <( find "$(git rev-parse --show-cdup)" -wholename "*/src/*.properties" -print0 )

if [ "$FAIL" -eq 1 ]; then
exit 1
fi
97 changes: 97 additions & 0 deletions tests/verify_mdb_properties.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
#!/bin/bash

# This script will check our metadata block files and scan if the properties files contain all the matching keys.

set -euo pipefail

if ! which jbang > /dev/null 2>&1; then
echo "Cannot find jbang on path. Did you install it?" >&2
exit 1
fi
if ! which native-image > /dev/null 2>&1; then
echo "Cannot find GraalVM native-image on path. Did you install it?" >&2
exit 1
fi

FAIL=0

# We need a small Java app to replace UTF-8 chars with nearest ascii / strip accents because of
# https://github.com/IQSS/dataverse/blob/dddcf29188a5c35174f3c94ffc1c4cb1d7fc0552/src/main/java/edu/harvard/iq/dataverse/ControlledVocabularyValue.java#L140
# This cannot be replaced by another tool, as it behaves rather individually.
DIR=$(mktemp -d)
SOURCE="$DIR/stripaccents.java"
STRIP_BIN="$(dirname "$0")/stripaccents"
cat > "$SOURCE" << EOF
///usr/bin/env jbang "\$0" "\$@" ; exit \$?
//JAVA 11+
//DEPS org.apache.commons:commons-lang3:3.12.0
import org.apache.commons.lang3.StringUtils;
import java.nio.charset.StandardCharsets;
import java.io.IOException;
class stripaccents {
public static void main(String[] args) throws IOException {
String input = new String(System.in.readAllBytes(), StandardCharsets.UTF_8);
System.out.println(StringUtils.stripAccents(input));
}
}
EOF
jbang export native --force --fresh -O "$STRIP_BIN" "$SOURCE"

while IFS= read -r -d '' MDB; do

echo "::group::$MDB"
BLOCK_NAME=$(sed -n "2p" "$MDB" | cut -f2)
BLOCK_DISPLAY_NAME=$(sed -n "2p" "$MDB" | cut -f4)
PROPERTIES_FILE="$(git rev-parse --show-cdup)src/main/java/propertyFiles/$BLOCK_NAME.properties"

# Check correct file exists
if [ ! -r "$PROPERTIES_FILE" ]; then
echo "::error::Missing properties file for metadata block '$BLOCK_NAME', expected at '$PROPERTIES_FILE'"
FAIL=1
continue
fi

# Check metadata block properties exist and are equal to TSV source
if ! grep -a -q -e "^metadatablock.name=$BLOCK_NAME$" "$PROPERTIES_FILE"; then
echo "::error::Missing 'metadatablock.name=$BLOCK_NAME' or different from TSV source in $PROPERTIES_FILE"
FAIL=1
fi
if ! grep -a -q -e "^metadatablock.displayName=$BLOCK_DISPLAY_NAME$" "$PROPERTIES_FILE"; then
echo "::error::Missing 'metadatablock.displayName=$BLOCK_DISPLAY_NAME' or different from TSV source in $PROPERTIES_FILE"
FAIL=1
fi
if ! grep -a -q -e "^metadatablock.displayFacet=" "$PROPERTIES_FILE"; then
echo "::error::Missing 'metadatablock.displayFacet=...' in $PROPERTIES_FILE"
FAIL=1
fi

# Check dataset fields
for FIELD in $(grep -a -A1000 "^#datasetField" "$MDB" | tail -n+2 | grep -a -B1000 "^#controlledVocabulary" | head -n-1 | cut -f2); do
for ENTRY in title description watermark; do
if ! grep -a -q -e "^datasetfieldtype.$FIELD.$ENTRY=" "$PROPERTIES_FILE"; then
echo "::error::Missing key 'datasetfieldtype.$FIELD.$ENTRY=...' in $PROPERTIES_FILE"
FAIL=1
fi
done
done

# Check CV entries
while read -r LINE; do
FIELD_NAME=$(echo "$LINE" | cut -f1)
FIELD_VALUE=$(echo "$LINE" | cut -f2 | tr '[:upper:]' '[:lower:]' | tr " " "_" | "$STRIP_BIN" )

if ! grep -q -a -e "^controlledvocabulary.$FIELD_NAME.$FIELD_VALUE=" "$PROPERTIES_FILE"; then
echo "::error::Missing key 'controlledvocabulary.$FIELD_NAME.$FIELD_VALUE=...' in $PROPERTIES_FILE"
FAIL=1
fi
done < <(grep -a -A1000 "^#controlledVocabulary" "$MDB" | tail -n+2)

echo "::endgroup::"

done < <( find "$(git rev-parse --show-cdup)scripts/api/data/metadatablocks" -name '*.tsv' -print0 )

rm "$SOURCE" "$STRIP_BIN"

if [ "$FAIL" -eq 1 ]; then
exit 1
fi

0 comments on commit f4b61bf

Please sign in to comment.