From 27a6ed9787cd086fd2908805d2c0ef0637d998b8 Mon Sep 17 00:00:00 2001 From: Shelly Belsky <71195502+sbelsk@users.noreply.github.com> Date: Mon, 8 Jul 2024 13:54:41 -0400 Subject: [PATCH] Improve the submission XML validation script (#2335) This PR expands on the initial implementation of the submission XML validator introduced in https://github.com/Kitware/CDash/pull/2277. The validator now supports parsing bigger XML files, can process multiple files per invocation, and provides better error handling. The logic for determining the submission XML file type has been pulled out to be used by both the submission parsers and the XML validation code. To use the validator from a CDash container, run: ``` php /artisan submission:validate ... ``` where `` is the path to the application root directory (e.g., `/cdash`), and `...` are one or more file paths to to be validated. --- app/Console/Commands/ValidateXml.php | 94 ++++++++++++++++++++-------- app/Utils/SubmissionUtils.php | 78 +++++++++++++++++++++++ app/cdash/include/ctestparser.php | 69 +++----------------- phpstan-baseline.neon | 52 +++++++++++++-- 4 files changed, 200 insertions(+), 93 deletions(-) create mode 100644 app/Utils/SubmissionUtils.php diff --git a/app/Console/Commands/ValidateXml.php b/app/Console/Commands/ValidateXml.php index 0cb2dbd99f..02278e609a 100644 --- a/app/Console/Commands/ValidateXml.php +++ b/app/Console/Commands/ValidateXml.php @@ -2,6 +2,7 @@ namespace App\Console\Commands; +use App\Utils\SubmissionUtils; use Illuminate\Console\Command; use DOMDocument; @@ -9,42 +10,85 @@ class ValidateXml extends Command { /** * The name and signature of the console command. - * - * @var string */ protected $signature = 'submission:validate - { xml_file : the XML file to be validated } - { xsd_file : the schema file to validate against }'; + { xml_file* : the XML file(s) to be validated }'; /** * The console command description. - * - * @var string|null */ protected $description = 'Validate XML submission files'; /** * Execute the console command. - * - * @return int */ - public function handle() + public function handle(): int { - $input_xml_file = $this->argument('xml_file'); - $schema_file = $this->argument('xsd_file'); - - // load the input files to be validated - $xml = new DOMDocument(); - $xml->load($input_xml_file); - - // run the validator. let it throw errors if there - // are any, since it prints nice error messages. - // FIXME: this might crash if the file is too big... - // change this to a streaming parser as opposed to - // loading the whole file into memory! - $xml->schemaValidate($schema_file); - - // if the validation succeeded, return 0 - return Command::SUCCESS; + // parse all input files from command line + $xml_files_args = $this->argument('xml_file'); + $schemas_dir = base_path()."/app/Validators/Schemas"; + + // process each of the input files + $has_errors = false; + foreach ($xml_files_args as $input_xml_file) { + // determine the file type by peeking at its contents + $xml_file_handle = fopen($input_xml_file, 'r'); + if ($xml_file_handle === false) { + $this->error("ERROR: Could not open file: '{$input_xml_file}'"); + $has_errors = true; + continue; + } + $xml_type = SubmissionUtils::get_xml_type($xml_file_handle)['xml_type']; + fclose($xml_file_handle); + + // verify we identified a valid xml type + if ($xml_type === '') { + $this->error("ERROR: Could not determine submission" + ." file type for: '{$input_xml_file}'"); + $has_errors = true; + continue; + } + + // verify we can find a corresponding schema file + $schema_file = "{$schemas_dir}/{$xml_type}.xsd"; + if (!file_exists($schema_file)) { + $this->error("ERROR: Could not find schema file '{$schema_file}'" + ." corresonding to input: '{$input_xml_file}'"); + $has_errors = true; + continue; + } + + // let us control the failures so we can continue + // parsing all the files instead of crashing midway + libxml_use_internal_errors(true); + + // load the input file to be validated + $xml = new DOMDocument(); + $xml->load($input_xml_file, LIBXML_PARSEHUGE); + + // run the validator and collect errors if there are any + if (!$xml->schemaValidate($schema_file)) { + $errors = libxml_get_errors(); + foreach ($errors as $error) { + if ($error->level > 2) { + $this->error("ERROR: {$error->message} in {$error->file}," + ." line: {$error->line}, column: {$error->column}"); + } + } + libxml_clear_errors(); + $has_errors = true; + continue; + } + $this->line("Validated file: {$input_xml_file}."); + } + + // finally, report the results + if ($has_errors) { + $this->error("FAILED: Some XML file checks did not pass!"); + return Command::FAILURE; + } else { + $this->line("SUCCESS: All XML file checks passed."); + return Command::SUCCESS; + } } } diff --git a/app/Utils/SubmissionUtils.php b/app/Utils/SubmissionUtils.php new file mode 100644 index 0000000000..99714e196b --- /dev/null +++ b/app/Utils/SubmissionUtils.php @@ -0,0 +1,78 @@ + + */ + public static function get_xml_type(mixed $filehandle): array + { + $file = ''; + $handler = null; + // read file contents until we recognize its elements + while ($file === '' && !feof($filehandle)) { + $content = fread($filehandle, 8192); + if ($content === false) { + // if read failed, fallback onto default null values + break; + } + if (str_contains($content, ' $filehandle, + 'xml_handler' => $handler, + 'xml_type' => $file, + ]; + } +} diff --git a/app/cdash/include/ctestparser.php b/app/cdash/include/ctestparser.php index c40526c698..00cbb3ad51 100644 --- a/app/cdash/include/ctestparser.php +++ b/app/cdash/include/ctestparser.php @@ -19,6 +19,7 @@ use CDash\Model\Project; use Illuminate\Support\Facades\Log; use Illuminate\Support\Facades\Storage; +use App\Utils\SubmissionUtils; class CDashParseException extends RuntimeException { @@ -103,27 +104,13 @@ function parse_put_submission($filehandler, $projectid, $expected_md5) $include_file = 'xml_handlers/' . $buildfile->type . '_handler.php'; $valid_types = [ 'BazelJSON', - 'build', 'BuildPropertiesJSON', - 'configure', - 'coverage', - 'coverage_junit', - 'coverage_log', - 'done', - 'dynamic_analysis', 'GcovTar', 'JavaJSONTar', 'JSCoverTar', - 'note', 'OpenCoverTar', - 'project', 'retry', - 'sax', 'SubProjectDirectories', - 'testing', - 'testing_junit', - 'update', - 'upload', ]; if (stream_resolve_include_path($include_file) === false || !in_array($buildfile->type, $valid_types, true)) { Log::error("Project: $projectid. No handler include file for {$buildfile->type} (tried $include_file)"); @@ -182,54 +169,12 @@ function ctest_parse($filehandle, $projectid, $expected_md5 = '') } // Figure out what type of XML file this is. - $handler = null; - $file = ''; - while (is_null($handler) && !feof($filehandle)) { - $content = fread($filehandle, 8192); - if (str_contains($content, '