Skip to content

Commit

Permalink
Migrate ReferenceDocs resource to plain text (elastic#113866)
Browse files Browse the repository at this point in the history
Removes the dependency on `XContent` parsing so we can move this out of
`:server` and into `:libs:core`.
  • Loading branch information
DaveCTurner authored Oct 2, 2024
1 parent a5d033b commit eb9b897
Show file tree
Hide file tree
Showing 4 changed files with 233 additions and 176 deletions.
94 changes: 63 additions & 31 deletions server/src/main/java/org/elasticsearch/common/ReferenceDocs.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,29 +11,26 @@

import org.elasticsearch.Build;
import org.elasticsearch.core.SuppressForbidden;
import org.elasticsearch.xcontent.XContentFactory;
import org.elasticsearch.xcontent.XContentParser;
import org.elasticsearch.xcontent.XContentParserConfiguration;
import org.elasticsearch.xcontent.XContentType;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.LinkedHashMap;
import java.nio.charset.StandardCharsets;
import java.util.Map;
import java.util.regex.Pattern;

/**
* Encapsulates links to pages in the reference docs, so that for example we can include URLs in logs and API outputs. Each instance's
* {@link #toString()} yields (a string representation of) a URL for the relevant docs. Links are defined in the resource file
* {@code reference-docs-links.json} which must include definitions for exactly the set of values of this enum.
* {@code reference-docs-links.txt} which must include definitions for exactly the set of values of this enum.
*/
public enum ReferenceDocs {
/*
* Note that the docs subsystem parses {@code reference-docs-links.json} with regexes, not a JSON parser, so the whitespace in the file
* is important too. See {@code sub check_elasticsearch_links} in {@code https://github.com/elastic/docs/blob/master/build_docs.pl} for
* more details.
* Note that the docs subsystem parses {@code reference-docs-links.txt} differently. See {@code sub check_elasticsearch_links} in
* {@code https://github.com/elastic/docs/blob/master/build_docs.pl} for more details.
*
* Also note that the docs are built from the HEAD of each minor release branch, so in principle docs can move around independently of
* the ES release process. To avoid breaking any links that have been baked into earlier patch releases, you may only add links in a
Expand Down Expand Up @@ -89,7 +86,7 @@ public enum ReferenceDocs {
private static final Map<String, String> linksBySymbol;

static {
try (var resourceStream = readFromJarResourceUrl(ReferenceDocs.class.getResource("reference-docs-links.json"))) {
try (var resourceStream = readFromJarResourceUrl(ReferenceDocs.class.getResource("reference-docs-links.txt"))) {
linksBySymbol = Map.copyOf(readLinksBySymbol(resourceStream));
} catch (Exception e) {
assert false : e;
Expand All @@ -101,34 +98,69 @@ public enum ReferenceDocs {
static final String CURRENT_VERSION_COMPONENT = "current";
static final String VERSION_COMPONENT = getVersionComponent(Build.current().version(), Build.current().isSnapshot());

static Map<String, String> readLinksBySymbol(InputStream inputStream) throws Exception {
try (var parser = XContentFactory.xContent(XContentType.JSON).createParser(XContentParserConfiguration.EMPTY, inputStream)) {
final var result = parser.map(LinkedHashMap::new, XContentParser::text);
final var iterator = result.keySet().iterator();
for (int i = 0; i < values().length; i++) {
final var expected = values()[i].name();
if (iterator.hasNext() == false) {
throw new IllegalStateException("ran out of values at index " + i + ": expecting " + expected);
}
final var actual = iterator.next();
if (actual.equals(expected) == false) {
throw new IllegalStateException("mismatch at index " + i + ": found " + actual + " but expected " + expected);
}
static final int SYMBOL_COLUMN_WIDTH = 64; // increase as needed to accommodate yet longer symbols

static Map<String, String> readLinksBySymbol(InputStream inputStream) throws IOException {
final var padding = " ".repeat(SYMBOL_COLUMN_WIDTH);

record LinksBySymbolEntry(String symbol, String link) implements Map.Entry<String, String> {
@Override
public String getKey() {
return symbol;
}
if (iterator.hasNext()) {
throw new IllegalStateException("found unexpected extra value: " + iterator.next());

@Override
public String getValue() {
return link;
}

@Override
public String setValue(String value) {
assert false;
throw new UnsupportedOperationException();
}
}

// We must only link to anchors with fixed IDs (defined by [[fragment-name]] in the docs) because auto-generated fragment IDs
// depend on the heading text and are too easy to break inadvertently. Auto-generated fragment IDs begin with an underscore.
for (final var entry : result.entrySet()) {
if (entry.getValue().startsWith("_") || entry.getValue().contains("#_")) {
throw new IllegalStateException("found auto-generated fragment ID at " + entry.getKey());
final var symbolCount = values().length;
final var linksBySymbolEntries = new LinksBySymbolEntry[symbolCount];

try (var reader = new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8))) {
for (int i = 0; i < symbolCount; i++) {
final var currentLine = reader.readLine();
final var symbol = values()[i].name();
if (currentLine == null) {
throw new IllegalStateException("links resource truncated at line " + (i + 1));
}
if (currentLine.startsWith(symbol + " ") == false) {
throw new IllegalStateException(
"unexpected symbol at line " + (i + 1) + ": expected line starting with [" + symbol + " ]"
);
}
final var link = currentLine.substring(SYMBOL_COLUMN_WIDTH).trim();
if (Strings.hasText(link) == false) {
throw new IllegalStateException("no link found for [" + symbol + "] at line " + (i + 1));
}
final var expectedLine = (symbol + padding).substring(0, SYMBOL_COLUMN_WIDTH) + link;
if (currentLine.equals(expectedLine) == false) {
throw new IllegalStateException("unexpected content at line " + (i + 1) + ": expected [" + expectedLine + "]");
}

// We must only link to anchors with fixed IDs (defined by [[fragment-name]] in the docs) because auto-generated fragment
// IDs depend on the heading text and are too easy to break inadvertently. Auto-generated fragment IDs begin with "_"
if (link.startsWith("_") || link.contains("#_")) {
throw new IllegalStateException(
"found auto-generated fragment ID in link [" + link + "] for [" + symbol + "] at line " + (i + 1)
);
}
linksBySymbolEntries[i] = new LinksBySymbolEntry(symbol, link);
}

return result;
if (reader.readLine() != null) {
throw new IllegalStateException("unexpected trailing content at line " + (symbolCount + 1));
}
}

return Map.ofEntries(linksBySymbolEntries);
}

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -1,46 +1,5 @@
{
"INITIAL_MASTER_NODES": "important-settings.html#initial_master_nodes",
"DISCOVERY_TROUBLESHOOTING": "discovery-troubleshooting.html",
"UNSTABLE_CLUSTER_TROUBLESHOOTING": "troubleshooting-unstable-cluster.html",
"LAGGING_NODE_TROUBLESHOOTING": "troubleshooting-unstable-cluster.html#troubleshooting-unstable-cluster-lagging",
"SHARD_LOCK_TROUBLESHOOTING": "troubleshooting-unstable-cluster.html#troubleshooting-unstable-cluster-shardlockobtainfailedexception",
"NETWORK_DISCONNECT_TROUBLESHOOTING": "troubleshooting-unstable-cluster.html#troubleshooting-unstable-cluster-network",
"CONCURRENT_REPOSITORY_WRITERS": "diagnosing-corrupted-repositories.html",
"ARCHIVE_INDICES": "archive-indices.html",
"HTTP_TRACER": "modules-network.html#http-rest-request-tracer",
"LOGGING": "logging.html",
"BOOTSTRAP_CHECK_HEAP_SIZE": "bootstrap-checks-heap-size.html",
"BOOTSTRAP_CHECK_FILE_DESCRIPTOR": "bootstrap-checks-file-descriptor.html",
"BOOTSTRAP_CHECK_MEMORY_LOCK": "bootstrap-checks-memory-lock.html",
"BOOTSTRAP_CHECK_MAX_NUMBER_THREADS": "max-number-threads-check.html",
"BOOTSTRAP_CHECK_MAX_FILE_SIZE": "bootstrap-checks-max-file-size.html",
"BOOTSTRAP_CHECK_MAX_SIZE_VIRTUAL_MEMORY": "max-size-virtual-memory-check.html",
"BOOTSTRAP_CHECK_MAXIMUM_MAP_COUNT": "bootstrap-checks-max-map-count.html",
"BOOTSTRAP_CHECK_CLIENT_JVM": "bootstrap-checks-client-jvm.html",
"BOOTSTRAP_CHECK_USE_SERIAL_COLLECTOR": "bootstrap-checks-serial-collector.html",
"BOOTSTRAP_CHECK_SYSTEM_CALL_FILTER": "bootstrap-checks-syscall-filter.html",
"BOOTSTRAP_CHECK_ONERROR_AND_ONOUTOFMEMORYERROR": "bootstrap-checks-onerror.html",
"BOOTSTRAP_CHECK_EARLY_ACCESS": "bootstrap-checks-early-access.html",
"BOOTSTRAP_CHECK_ALL_PERMISSION": "bootstrap-checks-all-permission.html",
"BOOTSTRAP_CHECK_DISCOVERY_CONFIGURATION": "bootstrap-checks-discovery-configuration.html",
"BOOTSTRAP_CHECKS": "bootstrap-checks.html",
"BOOTSTRAP_CHECK_ENCRYPT_SENSITIVE_DATA": "bootstrap-checks-xpack.html#bootstrap-checks-xpack-encrypt-sensitive-data",
"BOOTSTRAP_CHECK_PKI_REALM": "bootstrap-checks-xpack.html#bootstrap-checks-xpack-pki-realm",
"BOOTSTRAP_CHECK_ROLE_MAPPINGS": "bootstrap-checks-xpack.html#bootstrap-checks-xpack-role-mappings",
"BOOTSTRAP_CHECK_TLS": "bootstrap-checks-xpack.html#bootstrap-checks-tls",
"BOOTSTRAP_CHECK_TOKEN_SSL": "bootstrap-checks-xpack.html#bootstrap-checks-xpack-token-ssl",
"BOOTSTRAP_CHECK_SECURITY_MINIMAL_SETUP": "security-minimal-setup.html",
"CONTACT_SUPPORT": "troubleshooting.html#troubleshooting-contact-support",
"UNASSIGNED_SHARDS": "red-yellow-cluster-status.html",
"EXECUTABLE_JNA_TMPDIR": "executable-jna-tmpdir.html",
"NETWORK_THREADING_MODEL": "modules-network.html#modules-network-threading-model",
"ALLOCATION_EXPLAIN_API": "cluster-allocation-explain.html",
"NETWORK_BINDING_AND_PUBLISHING": "modules-network.html#modules-network-binding-publishing",
"SNAPSHOT_REPOSITORY_ANALYSIS": "repo-analysis-api.html",
"S3_COMPATIBLE_REPOSITORIES": "repository-s3.html#repository-s3-compatible-services",
"LUCENE_MAX_DOCS_LIMIT": "size-your-shards.html#troubleshooting-max-docs-limit",
"MAX_SHARDS_PER_NODE": "size-your-shards.html#troubleshooting-max-shards-open",
"FLOOD_STAGE_WATERMARK": "fix-watermark-errors.html",
"X_OPAQUE_ID": "api-conventions.html#x-opaque-id",
"FORMING_SINGLE_NODE_CLUSTERS": "modules-discovery-bootstrap-cluster.html#modules-discovery-bootstrap-cluster-joining"
}
[
"Content moved to reference-docs-links.txt",
"This is a temporary placeholder to satisfy sub check_elasticsearch_links in the docs build",
"Remove with @UpdateForV10 (if not before)"
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
INITIAL_MASTER_NODES important-settings.html#initial_master_nodes
DISCOVERY_TROUBLESHOOTING discovery-troubleshooting.html
UNSTABLE_CLUSTER_TROUBLESHOOTING troubleshooting-unstable-cluster.html
LAGGING_NODE_TROUBLESHOOTING troubleshooting-unstable-cluster.html#troubleshooting-unstable-cluster-lagging
SHARD_LOCK_TROUBLESHOOTING troubleshooting-unstable-cluster.html#troubleshooting-unstable-cluster-shardlockobtainfailedexception
NETWORK_DISCONNECT_TROUBLESHOOTING troubleshooting-unstable-cluster.html#troubleshooting-unstable-cluster-network
CONCURRENT_REPOSITORY_WRITERS diagnosing-corrupted-repositories.html
ARCHIVE_INDICES archive-indices.html
HTTP_TRACER modules-network.html#http-rest-request-tracer
LOGGING logging.html
BOOTSTRAP_CHECK_HEAP_SIZE bootstrap-checks-heap-size.html
BOOTSTRAP_CHECK_FILE_DESCRIPTOR bootstrap-checks-file-descriptor.html
BOOTSTRAP_CHECK_MEMORY_LOCK bootstrap-checks-memory-lock.html
BOOTSTRAP_CHECK_MAX_NUMBER_THREADS max-number-threads-check.html
BOOTSTRAP_CHECK_MAX_FILE_SIZE bootstrap-checks-max-file-size.html
BOOTSTRAP_CHECK_MAX_SIZE_VIRTUAL_MEMORY max-size-virtual-memory-check.html
BOOTSTRAP_CHECK_MAXIMUM_MAP_COUNT bootstrap-checks-max-map-count.html
BOOTSTRAP_CHECK_CLIENT_JVM bootstrap-checks-client-jvm.html
BOOTSTRAP_CHECK_USE_SERIAL_COLLECTOR bootstrap-checks-serial-collector.html
BOOTSTRAP_CHECK_SYSTEM_CALL_FILTER bootstrap-checks-syscall-filter.html
BOOTSTRAP_CHECK_ONERROR_AND_ONOUTOFMEMORYERROR bootstrap-checks-onerror.html
BOOTSTRAP_CHECK_EARLY_ACCESS bootstrap-checks-early-access.html
BOOTSTRAP_CHECK_ALL_PERMISSION bootstrap-checks-all-permission.html
BOOTSTRAP_CHECK_DISCOVERY_CONFIGURATION bootstrap-checks-discovery-configuration.html
BOOTSTRAP_CHECKS bootstrap-checks.html
BOOTSTRAP_CHECK_ENCRYPT_SENSITIVE_DATA bootstrap-checks-xpack.html#bootstrap-checks-xpack-encrypt-sensitive-data
BOOTSTRAP_CHECK_PKI_REALM bootstrap-checks-xpack.html#bootstrap-checks-xpack-pki-realm
BOOTSTRAP_CHECK_ROLE_MAPPINGS bootstrap-checks-xpack.html#bootstrap-checks-xpack-role-mappings
BOOTSTRAP_CHECK_TLS bootstrap-checks-xpack.html#bootstrap-checks-tls
BOOTSTRAP_CHECK_TOKEN_SSL bootstrap-checks-xpack.html#bootstrap-checks-xpack-token-ssl
BOOTSTRAP_CHECK_SECURITY_MINIMAL_SETUP security-minimal-setup.html
CONTACT_SUPPORT troubleshooting.html#troubleshooting-contact-support
UNASSIGNED_SHARDS red-yellow-cluster-status.html
EXECUTABLE_JNA_TMPDIR executable-jna-tmpdir.html
NETWORK_THREADING_MODEL modules-network.html#modules-network-threading-model
ALLOCATION_EXPLAIN_API cluster-allocation-explain.html
NETWORK_BINDING_AND_PUBLISHING modules-network.html#modules-network-binding-publishing
SNAPSHOT_REPOSITORY_ANALYSIS repo-analysis-api.html
S3_COMPATIBLE_REPOSITORIES repository-s3.html#repository-s3-compatible-services
LUCENE_MAX_DOCS_LIMIT size-your-shards.html#troubleshooting-max-docs-limit
MAX_SHARDS_PER_NODE size-your-shards.html#troubleshooting-max-shards-open
FLOOD_STAGE_WATERMARK fix-watermark-errors.html
X_OPAQUE_ID api-conventions.html#x-opaque-id
FORMING_SINGLE_NODE_CLUSTERS modules-discovery-bootstrap-cluster.html#modules-discovery-bootstrap-cluster-joining
Loading

0 comments on commit eb9b897

Please sign in to comment.