Skip to content

Commit

Permalink
Create a custom parser for parsing ISO8601 datetime variants (#106486)
Browse files Browse the repository at this point in the history
This adds a hand-written parser for parsing fixed ISO8601 datetime strings, for the `iso8601`, `strict_date_optional_time`, and `strict_date_optional_time_nanos` date formats. If the new parser fails to parse a string, the existing parsers are then tried, so existing behaviour is maintained. There is a new JVM option added that can force use of the existing parsers, if that is needed for any reason.
  • Loading branch information
thecoop authored May 14, 2024
1 parent 3fe4785 commit e01600d
Show file tree
Hide file tree
Showing 8 changed files with 1,371 additions and 44 deletions.
17 changes: 17 additions & 0 deletions docs/changelog/106486.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
pr: 106486
summary: Create custom parser for ISO-8601 datetimes
area: Infra/Core
type: enhancement
issues:
- 102063
highlight:
title: New custom parser for ISO-8601 datetimes
body: |-
This introduces a new custom parser for ISO-8601 datetimes, for the `iso8601`, `strict_date_optional_time`, and
`strict_date_optional_time_nanos` built-in date formats. This provides a performance improvement over the
default Java date-time parsing. Whilst it maintains much of the same behaviour,
the new parser does not accept nonsensical date-time strings that have multiple fractional seconds fields
or multiple timezone specifiers. If the new parser fails to parse a string, it will then use the previous parser
to parse it. If a large proportion of the input data consists of these invalid strings, this may cause
a small performance degradation. If you wish to force the use of the old parsers regardless,
set the JVM property `es.datetime.java_time_parsers=true` on all ES nodes.
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.common.time;

import java.util.stream.IntStream;

/**
* A CharSequence that provides a subsequence of another CharSequence without allocating a new backing array (as String does)
*/
class CharSubSequence implements CharSequence {
private final CharSequence wrapped;
private final int startOffset; // inclusive
private final int endOffset; // exclusive

CharSubSequence(CharSequence wrapped, int startOffset, int endOffset) {
if (startOffset < 0) throw new IllegalArgumentException();
if (endOffset > wrapped.length()) throw new IllegalArgumentException();
if (endOffset < startOffset) throw new IllegalArgumentException();

this.wrapped = wrapped;
this.startOffset = startOffset;
this.endOffset = endOffset;
}

@Override
public int length() {
return endOffset - startOffset;
}

@Override
public char charAt(int index) {
int adjustedIndex = index + startOffset;
if (adjustedIndex < startOffset || adjustedIndex >= endOffset) throw new IndexOutOfBoundsException(index);
return wrapped.charAt(adjustedIndex);
}

@Override
public boolean isEmpty() {
return startOffset == endOffset;
}

@Override
public CharSequence subSequence(int start, int end) {
int adjustedStart = start + startOffset;
int adjustedEnd = end + startOffset;
if (adjustedStart < startOffset) throw new IndexOutOfBoundsException(start);
if (adjustedEnd > endOffset) throw new IndexOutOfBoundsException(end);
if (adjustedStart > adjustedEnd) throw new IndexOutOfBoundsException();

return wrapped.subSequence(adjustedStart, adjustedEnd);
}

@Override
public IntStream chars() {
return wrapped.chars().skip(startOffset).limit(endOffset - startOffset);
}

@Override
public String toString() {
return wrapped.subSequence(startOffset, endOffset).toString();
}
}
135 changes: 91 additions & 44 deletions server/src/main/java/org/elasticsearch/common/time/DateFormatters.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@
package org.elasticsearch.common.time;

import org.elasticsearch.common.Strings;
import org.elasticsearch.core.Booleans;
import org.elasticsearch.core.SuppressForbidden;
import org.elasticsearch.core.UpdateForV9;
import org.elasticsearch.logging.internal.spi.LoggerFactory;

import java.time.Instant;
import java.time.LocalDate;
Expand All @@ -30,6 +33,7 @@
import java.time.temporal.TemporalQuery;
import java.time.temporal.WeekFields;
import java.util.Locale;
import java.util.Set;
import java.util.stream.Stream;

import static java.time.temporal.ChronoField.DAY_OF_MONTH;
Expand All @@ -43,6 +47,24 @@

public class DateFormatters {

/**
* The ISO8601 parser is as close as possible to the java.time based parsers, but there are some strings
* that are no longer accepted (multiple fractional seconds, or multiple timezones) by the ISO parser.
* If a string cannot be parsed by the ISO parser, it then tries the java.time one.
* If there's lots of these strings, trying the ISO parser, then the java.time parser, might cause a performance drop.
* So provide a JVM option so that users can just use the java.time parsers, if they really need to.
*/
@UpdateForV9 // evaluate if we need to deprecate/remove this
private static final boolean JAVA_TIME_PARSERS_ONLY = Booleans.parseBoolean(System.getProperty("es.datetime.java_time_parsers"), false);

static {
// when this is used directly in tests ES logging may not have been initialized yet
LoggerFactory logger;
if (JAVA_TIME_PARSERS_ONLY && (logger = LoggerFactory.provider()) != null) {
logger.getLogger(DateFormatters.class).info("Using java.time datetime parsers only");
}
}

private static DateFormatter newDateFormatter(String format, DateTimeFormatter formatter) {
return new JavaDateFormatter(format, new JavaTimeDateTimePrinter(formatter), new JavaTimeDateTimeParser(formatter));
}
Expand Down Expand Up @@ -168,11 +190,18 @@ private static DateFormatter newDateFormatter(String format, DateTimeFormatter p
/**
* Returns a generic ISO datetime parser where the date is mandatory and the time is optional.
*/
private static final DateFormatter STRICT_DATE_OPTIONAL_TIME = newDateFormatter(
"strict_date_optional_time",
STRICT_DATE_OPTIONAL_TIME_PRINTER,
STRICT_DATE_OPTIONAL_TIME_FORMATTER
);
private static final DateFormatter STRICT_DATE_OPTIONAL_TIME;
static {
DateTimeParser javaTimeParser = new JavaTimeDateTimeParser(STRICT_DATE_OPTIONAL_TIME_FORMATTER);

STRICT_DATE_OPTIONAL_TIME = new JavaDateFormatter(
"strict_date_optional_time",
new JavaTimeDateTimePrinter(STRICT_DATE_OPTIONAL_TIME_PRINTER),
JAVA_TIME_PARSERS_ONLY
? new DateTimeParser[] { javaTimeParser }
: new DateTimeParser[] { new Iso8601DateTimeParser(Set.of(), false).withLocale(Locale.ROOT), javaTimeParser }
);
}

private static final DateTimeFormatter STRICT_DATE_OPTIONAL_TIME_FORMATTER_WITH_NANOS = new DateTimeFormatterBuilder().append(
STRICT_YEAR_MONTH_DAY_FORMATTER
Expand Down Expand Up @@ -224,51 +253,69 @@ private static DateFormatter newDateFormatter(String format, DateTimeFormatter p
/**
* Returns a generic ISO datetime parser where the date is mandatory and the time is optional with nanosecond resolution.
*/
private static final DateFormatter STRICT_DATE_OPTIONAL_TIME_NANOS = newDateFormatter(
"strict_date_optional_time_nanos",
STRICT_DATE_OPTIONAL_TIME_PRINTER_NANOS,
STRICT_DATE_OPTIONAL_TIME_FORMATTER_WITH_NANOS
);
private static final DateFormatter STRICT_DATE_OPTIONAL_TIME_NANOS;
static {
DateTimeParser javaTimeParser = new JavaTimeDateTimeParser(STRICT_DATE_OPTIONAL_TIME_FORMATTER_WITH_NANOS);

STRICT_DATE_OPTIONAL_TIME_NANOS = new JavaDateFormatter(
"strict_date_optional_time_nanos",
new JavaTimeDateTimePrinter(STRICT_DATE_OPTIONAL_TIME_PRINTER_NANOS),
JAVA_TIME_PARSERS_ONLY
? new DateTimeParser[] { javaTimeParser }
: new DateTimeParser[] {
new Iso8601DateTimeParser(Set.of(HOUR_OF_DAY, MINUTE_OF_HOUR, SECOND_OF_MINUTE), true).withLocale(Locale.ROOT),
javaTimeParser }
);
}

/**
* Returns a ISO 8601 compatible date time formatter and parser.
* This is not fully compatible to the existing spec, which would require far more edge cases, but merely compatible with the
* existing legacy joda time ISO date formatter
*/
private static final DateFormatter ISO_8601 = newDateFormatter(
"iso8601",
STRICT_DATE_OPTIONAL_TIME_PRINTER,
new DateTimeFormatterBuilder().append(STRICT_YEAR_MONTH_DAY_FORMATTER)
.optionalStart()
.appendLiteral('T')
.optionalStart()
.appendValue(HOUR_OF_DAY, 2, 2, SignStyle.NOT_NEGATIVE)
.optionalStart()
.appendLiteral(':')
.appendValue(MINUTE_OF_HOUR, 2, 2, SignStyle.NOT_NEGATIVE)
.optionalStart()
.appendLiteral(':')
.appendValue(SECOND_OF_MINUTE, 2, 2, SignStyle.NOT_NEGATIVE)
.optionalStart()
.appendFraction(NANO_OF_SECOND, 1, 9, true)
.optionalEnd()
.optionalStart()
.appendLiteral(",")
.appendFraction(NANO_OF_SECOND, 1, 9, false)
.optionalEnd()
.optionalEnd()
.optionalEnd()
.optionalEnd()
.optionalStart()
.appendZoneOrOffsetId()
.optionalEnd()
.optionalStart()
.append(TIME_ZONE_FORMATTER_NO_COLON)
.optionalEnd()
.optionalEnd()
.toFormatter(Locale.ROOT)
.withResolverStyle(ResolverStyle.STRICT)
);
private static final DateFormatter ISO_8601;
static {
DateTimeParser javaTimeParser = new JavaTimeDateTimeParser(
new DateTimeFormatterBuilder().append(STRICT_YEAR_MONTH_DAY_FORMATTER)
.optionalStart()
.appendLiteral('T')
.optionalStart()
.appendValue(HOUR_OF_DAY, 2, 2, SignStyle.NOT_NEGATIVE)
.optionalStart()
.appendLiteral(':')
.appendValue(MINUTE_OF_HOUR, 2, 2, SignStyle.NOT_NEGATIVE)
.optionalStart()
.appendLiteral(':')
.appendValue(SECOND_OF_MINUTE, 2, 2, SignStyle.NOT_NEGATIVE)
.optionalStart()
.appendFraction(NANO_OF_SECOND, 1, 9, true)
.optionalEnd()
.optionalStart()
.appendLiteral(",")
.appendFraction(NANO_OF_SECOND, 1, 9, false)
.optionalEnd()
.optionalEnd()
.optionalEnd()
.optionalEnd()
.optionalStart()
.appendZoneOrOffsetId()
.optionalEnd()
.optionalStart()
.append(TIME_ZONE_FORMATTER_NO_COLON)
.optionalEnd()
.optionalEnd()
.toFormatter(Locale.ROOT)
.withResolverStyle(ResolverStyle.STRICT)
);

ISO_8601 = new JavaDateFormatter(
"iso8601",
new JavaTimeDateTimePrinter(STRICT_DATE_OPTIONAL_TIME_PRINTER),
JAVA_TIME_PARSERS_ONLY
? new DateTimeParser[] { javaTimeParser }
: new DateTimeParser[] { new Iso8601DateTimeParser(Set.of(), false).withLocale(Locale.ROOT), javaTimeParser }
);
}

/////////////////////////////////////////
//
Expand Down
Loading

0 comments on commit e01600d

Please sign in to comment.