From f5d6b347bb3aa397c29d1773f21ee92a21a9c9dc Mon Sep 17 00:00:00 2001 From: Darin Spivey Date: Tue, 9 Apr 2024 09:30:17 -0400 Subject: [PATCH] fix(classification): Use the correct SYSLOG pattern order The pattern for `SYSLOGPAMSESSION` is similar enough to `SYSLOGLINE` that it was getting clobbered. Rearrange the order of the syslog grok patterns to place `SYSLOGLINE` last as a catch-all for syslog. Ref: LOG-19650 --- src/transforms/mezmo_log_classification.rs | 55 +++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/src/transforms/mezmo_log_classification.rs b/src/transforms/mezmo_log_classification.rs index baabf0194..5e2fae1a7 100644 --- a/src/transforms/mezmo_log_classification.rs +++ b/src/transforms/mezmo_log_classification.rs @@ -39,8 +39,8 @@ const DEFAULT_LOG_EVENT_TYPES: [&str; 67] = [ "HTTPD_COMMONLOG", "HTTPD_ERRORLOG", "SYSLOG5424LINE", - "SYSLOGLINE", // This is overridden by a custom pattern. The default is not strict enough and is causing false positives. "SYSLOGPAMSESSION", + "SYSLOGLINE", // This is overridden by a custom pattern. The default is not strict enough and is causing false positives. "CRONLOG", "MONGO3_LOG", "NAGIOSLOGLINE", @@ -519,6 +519,59 @@ mod tests { ); } + #[tokio::test] + async fn syslog_pattern_order() { + // Since variations of syslog lines can potentially match more than one pattern, + // we rely heavily on our ordering. This makes sure that `SYSLOGPAMSESSION` is not + // clobbered by `SYSLOGLINE`. Send separately so we don't have a "first one wins" situation. + + let message_key = "message".to_string(); + + let syslog_line = r#"2024-04-08T22:01:18Z <2897.2454> 10.8.201.201 oldsmsdashboard[4972]: Decade impact various door few look important."#; + let syslog_event = Event::Log(LogEvent::from(Value::Object( + btreemap!(message_key.clone() => Value::Bytes(syslog_line.into())), + ))); + + let pam_session_line = r#"Apr 08 22:01:18 <7106.8433> 10.66.13.97 bigqueryprocessor[1118]: conference(legacy_bigquery): session open for user robertssharon by (uid=8764)"#; + let pam_session_event = Event::Log(LogEvent::from(Value::Object( + btreemap!(message_key.clone() => Value::Bytes(pam_session_line.into())), + ))); + + let config = LogClassificationConfig { + line_fields: None, + grok_patterns: default_grok_patterns(), + app_fields: default_app_fields(), + host_fields: default_host_fields(), + level_fields: default_level_fields(), + }; + let output_syslog = do_transform(config.clone(), syslog_event.clone().into()) + .await + .unwrap(); + let annotations_syslog = + make_expected_annotations(&syslog_event, None, vec!["SYSLOGLINE".to_string()]); + + assert_eq!( + output_syslog.as_log().get(log_schema().annotations_key()), + Some(&annotations_syslog) + ); + + let output_pam_session = do_transform(config.clone(), pam_session_event.clone().into()) + .await + .unwrap(); + let annotations_pam_session = make_expected_annotations( + &pam_session_event, + None, + vec!["SYSLOGPAMSESSION".to_string()], + ); + + assert_eq!( + output_pam_session + .as_log() + .get(log_schema().annotations_key()), + Some(&annotations_pam_session), + ); + } + #[tokio::test] async fn event_with_json_string_message() { let line = r#"47.29.201.179 - - [28/Feb/2019:13:17:10 +0000] "GET /?p=1 HTTP/2.0" 200 5316 "https://domain1.com/?p=1" "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36" "2.75"#;