Skip to content

Commit

Permalink
Improve the submission XML validation script (#2335)
Browse files Browse the repository at this point in the history
This PR expands on the initial implementation of the submission XML
validator introduced in #2277. The
validator now supports parsing bigger XML files, can process multiple
files per invocation, and provides better error handling. The logic for
determining the submission XML file type has been pulled out to be used
by both the submission parsers and the XML validation code.

To use the validator from a CDash container, run:
```
php <cdash_base>/artisan submission:validate <xml_file>...
```
where `<cdash_base>` is the path to the application root directory
(e.g., `/cdash`), and `<xml_file>...` are one or more file paths to to
be validated.
  • Loading branch information
sbelsk committed Jul 8, 2024
1 parent 4e27d50 commit 27a6ed9
Show file tree
Hide file tree
Showing 4 changed files with 200 additions and 93 deletions.
94 changes: 69 additions & 25 deletions app/Console/Commands/ValidateXml.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,49 +2,93 @@

namespace App\Console\Commands;

use App\Utils\SubmissionUtils;
use Illuminate\Console\Command;
use DOMDocument;

class ValidateXml extends Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected $signature = 'submission:validate
{ xml_file : the XML file to be validated }
{ xsd_file : the schema file to validate against }';
{ xml_file* : the XML file(s) to be validated }';

/**
* The console command description.
*
* @var string|null
*/
protected $description = 'Validate XML submission files';

/**
* Execute the console command.
*
* @return int
*/
public function handle()
public function handle(): int
{
$input_xml_file = $this->argument('xml_file');
$schema_file = $this->argument('xsd_file');

// load the input files to be validated
$xml = new DOMDocument();
$xml->load($input_xml_file);

// run the validator. let it throw errors if there
// are any, since it prints nice error messages.
// FIXME: this might crash if the file is too big...
// change this to a streaming parser as opposed to
// loading the whole file into memory!
$xml->schemaValidate($schema_file);

// if the validation succeeded, return 0
return Command::SUCCESS;
// parse all input files from command line
$xml_files_args = $this->argument('xml_file');
$schemas_dir = base_path()."/app/Validators/Schemas";

// process each of the input files
$has_errors = false;
foreach ($xml_files_args as $input_xml_file) {
// determine the file type by peeking at its contents
$xml_file_handle = fopen($input_xml_file, 'r');
if ($xml_file_handle === false) {
$this->error("ERROR: Could not open file: '{$input_xml_file}'");
$has_errors = true;
continue;
}
$xml_type = SubmissionUtils::get_xml_type($xml_file_handle)['xml_type'];
fclose($xml_file_handle);

// verify we identified a valid xml type
if ($xml_type === '') {
$this->error("ERROR: Could not determine submission"
." file type for: '{$input_xml_file}'");
$has_errors = true;
continue;
}

// verify we can find a corresponding schema file
$schema_file = "{$schemas_dir}/{$xml_type}.xsd";
if (!file_exists($schema_file)) {
$this->error("ERROR: Could not find schema file '{$schema_file}'"
." corresonding to input: '{$input_xml_file}'");
$has_errors = true;
continue;
}

// let us control the failures so we can continue
// parsing all the files instead of crashing midway
libxml_use_internal_errors(true);

// load the input file to be validated
$xml = new DOMDocument();
$xml->load($input_xml_file, LIBXML_PARSEHUGE);

// run the validator and collect errors if there are any
if (!$xml->schemaValidate($schema_file)) {
$errors = libxml_get_errors();
foreach ($errors as $error) {
if ($error->level > 2) {
$this->error("ERROR: {$error->message} in {$error->file},"
." line: {$error->line}, column: {$error->column}");
}
}
libxml_clear_errors();
$has_errors = true;
continue;
}
$this->line("Validated file: {$input_xml_file}.");
}

// finally, report the results
if ($has_errors) {
$this->error("FAILED: Some XML file checks did not pass!");
return Command::FAILURE;
} else {
$this->line("SUCCESS: All XML file checks passed.");
return Command::SUCCESS;
}
}
}
78 changes: 78 additions & 0 deletions app/Utils/SubmissionUtils.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
<?php

declare(strict_types=1);

namespace App\Utils;

class SubmissionUtils
{

/**
* Figure out what type of XML file this is
* @return array<string,mixed>
*/
public static function get_xml_type(mixed $filehandle): array
{
$file = '';
$handler = null;
// read file contents until we recognize its elements
while ($file === '' && !feof($filehandle)) {
$content = fread($filehandle, 8192);
if ($content === false) {
// if read failed, fallback onto default null values
break;
}
if (str_contains($content, '<Update')) {
// Should be first otherwise confused with Build
$handler = \UpdateHandler::class;
$file = 'Update';
} elseif (str_contains($content, '<Build')) {
$handler = \BuildHandler::class;
$file = 'Build';
} elseif (str_contains($content, '<Configure')) {
$handler = \ConfigureHandler::class;
$file = 'Configure';
} elseif (str_contains($content, '<Testing')) {
$handler = \TestingHandler::class;
$file = 'Test';
} elseif (str_contains($content, '<CoverageLog')) {
// Should be before coverage
$handler = \CoverageLogHandler::class;
$file = 'CoverageLog';
} elseif (str_contains($content, '<Coverage')) {
$handler = \CoverageHandler::class;
$file = 'Coverage';
} elseif (str_contains($content, '<report')) {
$handler = \CoverageJUnitHandler::class;
$file = 'CoverageJUnit';
} elseif (str_contains($content, '<Notes')) {
$handler = \NoteHandler::class;
$file = 'Notes';
} elseif (str_contains($content, '<DynamicAnalysis')) {
$handler = \DynamicAnalysisHandler::class;
$file = 'DynamicAnalysis';
} elseif (str_contains($content, '<Project')) {
$handler = \ProjectHandler::class;
$file = 'Project';
} elseif (str_contains($content, '<Upload')) {
$handler = \UploadHandler::class;
$file = 'Upload';
} elseif (str_contains($content, '<testsuite')) {
$handler = \TestingJUnitHandler::class;
$file = 'TestJUnit';
} elseif (str_contains($content, '<Done')) {
$handler = \DoneHandler::class;
$file = 'Done';
}
}

// restore the file descriptor to beginning of file
rewind($filehandle);

return [
'file_handle' => $filehandle,
'xml_handler' => $handler,
'xml_type' => $file,
];
}
}
69 changes: 7 additions & 62 deletions app/cdash/include/ctestparser.php
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
use CDash\Model\Project;
use Illuminate\Support\Facades\Log;
use Illuminate\Support\Facades\Storage;
use App\Utils\SubmissionUtils;

class CDashParseException extends RuntimeException
{
Expand Down Expand Up @@ -103,27 +104,13 @@ function parse_put_submission($filehandler, $projectid, $expected_md5)
$include_file = 'xml_handlers/' . $buildfile->type . '_handler.php';
$valid_types = [
'BazelJSON',
'build',
'BuildPropertiesJSON',
'configure',
'coverage',
'coverage_junit',
'coverage_log',
'done',
'dynamic_analysis',
'GcovTar',
'JavaJSONTar',
'JSCoverTar',
'note',
'OpenCoverTar',
'project',
'retry',
'sax',
'SubProjectDirectories',
'testing',
'testing_junit',
'update',
'upload',
];
if (stream_resolve_include_path($include_file) === false || !in_array($buildfile->type, $valid_types, true)) {
Log::error("Project: $projectid. No handler include file for {$buildfile->type} (tried $include_file)");
Expand Down Expand Up @@ -182,54 +169,12 @@ function ctest_parse($filehandle, $projectid, $expected_md5 = '')
}

// Figure out what type of XML file this is.
$handler = null;
$file = '';
while (is_null($handler) && !feof($filehandle)) {
$content = fread($filehandle, 8192);
if (str_contains($content, '<Update')) {
// Should be first otherwise confused with Build
$handler = new UpdateHandler($projectid);
$file = 'Update';
} elseif (str_contains($content, '<Build')) {
$handler = new BuildHandler($projectid);
$file = 'Build';
} elseif (str_contains($content, '<Configure')) {
$handler = new ConfigureHandler($projectid);
$file = 'Configure';
} elseif (str_contains($content, '<Testing')) {
$handler = new TestingHandler($projectid);
$file = 'Test';
} elseif (str_contains($content, '<CoverageLog')) {
// Should be before coverage

$handler = new CoverageLogHandler($projectid);
$file = 'CoverageLog';
} elseif (str_contains($content, '<Coverage')) {
$handler = new CoverageHandler($projectid);
$file = 'Coverage';
} elseif (str_contains($content, '<report')) {
$handler = new CoverageJUnitHandler($projectid);
$file = 'Coverage';
} elseif (str_contains($content, '<Notes')) {
$handler = new NoteHandler($projectid);
$file = 'Notes';
} elseif (str_contains($content, '<DynamicAnalysis')) {
$handler = new DynamicAnalysisHandler($projectid);
$file = 'DynamicAnalysis';
} elseif (str_contains($content, '<Project')) {
$handler = new ProjectHandler($projectid);
$file = 'Project';
} elseif (str_contains($content, '<Upload')) {
$handler = new UploadHandler($projectid);
$file = 'Upload';
} elseif (str_contains($content, '<testsuite')) {
$handler = new TestingJUnitHandler($projectid);
$file = 'Test';
} elseif (str_contains($content, '<Done')) {
$handler = new DoneHandler($projectid);
$file = 'Done';
}
}
$xml_info = SubmissionUtils::get_xml_type($filehandle);
$filehandle = $xml_info['file_handle'];
$handler_ref = $xml_info['xml_handler'];
$file = $xml_info['xml_type'];

$handler = isset($handler_ref) ? new $handler_ref($projectid) : null;

rewind($filehandle);
$content = fread($filehandle, 8192);
Expand Down
52 changes: 46 additions & 6 deletions phpstan-baseline.neon
Original file line number Diff line number Diff line change
Expand Up @@ -13296,7 +13296,7 @@ parameters:
path: app/cdash/include/common.php

-
message: "#^Access to an undefined property BuildHandler\\|ConfigureHandler\\|CoverageHandler\\|CoverageJUnitHandler\\|CoverageLogHandler\\|DoneHandler\\|DynamicAnalysisHandler\\|NoteHandler\\|ProjectHandler\\|TestingHandler\\|TestingJUnitHandler\\|UpdateHandler\\|UploadHandler\\:\\:\\$backupFileName\\.$#"
message: "#^Access to an undefined property BazelJSONHandler\\|BuildPropertiesJSONHandler\\|GCovTarHandler\\|JavaJSONTarHandler\\|JSCoverTarHandler\\|OpenCoverTarHandler\\|RetryHandler\\|SubProjectDirectoriesHandler\\:\\:\\$backupFileName\\.$#"
count: 1
path: app/cdash/include/ctestparser.php

Expand All @@ -13306,7 +13306,27 @@ parameters:
path: app/cdash/include/ctestparser.php

-
message: "#^Call to an undefined method object\\:\\:Parse\\(\\)\\.$#"
message: "#^Call to an undefined method BazelJSONHandler\\|BuildPropertiesJSONHandler\\|GCovTarHandler\\|JavaJSONTarHandler\\|JSCoverTarHandler\\|OpenCoverTarHandler\\|RetryHandler\\|SubProjectDirectoriesHandler\\:\\:Parse\\(\\)\\.$#"
count: 1
path: app/cdash/include/ctestparser.php

-
message: "#^Call to an undefined method object\\:\\:getBuildName\\(\\)\\.$#"
count: 1
path: app/cdash/include/ctestparser.php

-
message: "#^Call to an undefined method object\\:\\:getBuildStamp\\(\\)\\.$#"
count: 1
path: app/cdash/include/ctestparser.php

-
message: "#^Call to an undefined method object\\:\\:getSiteName\\(\\)\\.$#"
count: 1
path: app/cdash/include/ctestparser.php

-
message: "#^Call to an undefined method object\\:\\:getSubProjectName\\(\\)\\.$#"
count: 1
path: app/cdash/include/ctestparser.php

Expand Down Expand Up @@ -13334,6 +13354,16 @@ parameters:
count: 3
path: app/cdash/include/ctestparser.php

-
message: "#^Class GCovTarHandler referenced with incorrect case\\: GcovTarHandler\\.$#"
count: 1
path: app/cdash/include/ctestparser.php

-
message: "#^Class RetryHandler referenced with incorrect case\\: retryHandler\\.$#"
count: 1
path: app/cdash/include/ctestparser.php

-
message: "#^Construct empty\\(\\) is not allowed\\. Use more strict comparison\\.$#"
count: 4
Expand Down Expand Up @@ -13435,13 +13465,23 @@ parameters:
path: app/cdash/include/ctestparser.php

-
message: "#^Parameter \\#1 \\$haystack of function str_contains expects string, string\\|false given\\.$#"
count: 13
message: "#^Parameter \\#2 \\$data of function xml_parse expects string, string\\|false given\\.$#"
count: 2
path: app/cdash/include/ctestparser.php

-
message: "#^Parameter \\#2 \\$data of function xml_parse expects string, string\\|false given\\.$#"
count: 2
message: "#^Parameter \\#2 \\$handler of function xml_set_character_data_handler expects callable\\(\\)\\: mixed, array\\{object, 'text'\\} given\\.$#"
count: 1
path: app/cdash/include/ctestparser.php

-
message: "#^Parameter \\#2 \\$start_handler of function xml_set_element_handler expects callable\\(\\)\\: mixed, array\\{object, 'startElement'\\} given\\.$#"
count: 1
path: app/cdash/include/ctestparser.php

-
message: "#^Parameter \\#3 \\$end_handler of function xml_set_element_handler expects callable\\(\\)\\: mixed, array\\{object, 'endElement'\\} given\\.$#"
count: 1
path: app/cdash/include/ctestparser.php

-
Expand Down

0 comments on commit 27a6ed9

Please sign in to comment.