Skip to content

Commit

Permalink
Merge branch 'master' into local-checkpoint-throwback
Browse files Browse the repository at this point in the history
* master:
  [Analysis] Support normalizer in request param (elastic#24767)
  Remove deprecated IdsQueryBuilder constructor (elastic#25529)
  Adds check for negative search request size (elastic#25397)
  test: also inspect the upgrade api response to check whether the upgrade really ran
  [DOCS] restructure java clients docs pages (elastic#25517)
  • Loading branch information
jasontedor committed Jul 4, 2017
2 parents 385c948 + 6894ef6 commit ab8eeb3
Show file tree
Hide file tree
Showing 38 changed files with 467 additions and 120 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
*/
package org.elasticsearch.action.admin.indices.analyze;

import org.elasticsearch.Version;
import org.elasticsearch.action.ActionRequestValidationException;
import org.elasticsearch.action.support.single.shard.SingleShardRequest;
import org.elasticsearch.common.Strings;
Expand Down Expand Up @@ -59,6 +60,8 @@ public class AnalyzeRequest extends SingleShardRequest<AnalyzeRequest> {

private String[] attributes = Strings.EMPTY_ARRAY;

private String normalizer;

public static class NameOrDefinition implements Writeable {
// exactly one of these two members is not null
public final String name;
Expand Down Expand Up @@ -202,12 +205,27 @@ public String[] attributes() {
return this.attributes;
}

public String normalizer() {
return this.normalizer;
}

public AnalyzeRequest normalizer(String normalizer) {
this.normalizer = normalizer;
return this;
}

@Override
public ActionRequestValidationException validate() {
ActionRequestValidationException validationException = null;
if (text == null || text.length == 0) {
validationException = addValidationError("text is missing", validationException);
}
if ((index == null || index.length() == 0) && normalizer != null) {
validationException = addValidationError("index is required if normalizer is specified", validationException);
}
if (normalizer != null && (tokenizer != null || analyzer != null)) {
validationException = addValidationError("tokenizer/analyze should be null if normalizer is specified", validationException);
}
return validationException;
}

Expand All @@ -222,6 +240,9 @@ public void readFrom(StreamInput in) throws IOException {
field = in.readOptionalString();
explain = in.readBoolean();
attributes = in.readStringArray();
if (in.getVersion().onOrAfter(Version.V_6_0_0_alpha3)) {
normalizer = in.readOptionalString();
}
}

@Override
Expand All @@ -235,5 +256,8 @@ public void writeTo(StreamOutput out) throws IOException {
out.writeOptionalString(field);
out.writeBoolean(explain);
out.writeStringArray(attributes);
if (out.getVersion().onOrAfter(Version.V_6_0_0_alpha3)) {
out.writeOptionalString(normalizer);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -125,4 +125,13 @@ public AnalyzeRequestBuilder setText(String... texts) {
request.text(texts);
return this;
}

/**
* Instead of setting the analyzer and tokenizer, sets the normalizer as name
*/
public AnalyzeRequestBuilder setNormalizer(String normalizer) {
request.normalizer(normalizer);
return this;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
import org.elasticsearch.index.analysis.CustomAnalyzer;
import org.elasticsearch.index.analysis.CustomAnalyzerProvider;
import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.index.analysis.TokenizerFactory;
Expand All @@ -60,6 +61,7 @@
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.indices.IndicesService;
import org.elasticsearch.indices.analysis.AnalysisModule;
import org.elasticsearch.indices.analysis.PreBuiltTokenizers;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.transport.TransportService;

Expand Down Expand Up @@ -178,21 +180,46 @@ public static AnalyzeResponse analyze(AnalyzeRequest request, String field, Anal
throw new IllegalArgumentException("failed to find analyzer [" + request.analyzer() + "]");
}
}

} else if (request.tokenizer() != null) {
final IndexSettings indexSettings = indexAnalyzers == null ? null : indexAnalyzers.getIndexSettings();
Tuple<String, TokenizerFactory> tokenizerFactory = parseTokenizerFactory(request, indexAnalyzers,
analysisRegistry, environment);

List<CharFilterFactory> charFilterFactoryList = parseCharFilterFactories(request, indexSettings, analysisRegistry, environment);
List<CharFilterFactory> charFilterFactoryList =
parseCharFilterFactories(request, indexSettings, analysisRegistry, environment, false);

List<TokenFilterFactory> tokenFilterFactoryList = parseTokenFilterFactories(request, indexSettings, analysisRegistry,
environment, tokenizerFactory, charFilterFactoryList);
environment, tokenizerFactory, charFilterFactoryList, false);

analyzer = new CustomAnalyzer(tokenizerFactory.v1(), tokenizerFactory.v2(),
charFilterFactoryList.toArray(new CharFilterFactory[charFilterFactoryList.size()]),
tokenFilterFactoryList.toArray(new TokenFilterFactory[tokenFilterFactoryList.size()]));
closeAnalyzer = true;
} else if (request.normalizer() != null) {
// Get normalizer from indexAnalyzers
analyzer = indexAnalyzers.getNormalizer(request.normalizer());
if (analyzer == null) {
throw new IllegalArgumentException("failed to find normalizer under [" + request.normalizer() + "]");
}
} else if (((request.tokenFilters() != null && request.tokenFilters().size() > 0)
|| (request.charFilters() != null && request.charFilters().size() > 0))) {
final IndexSettings indexSettings = indexAnalyzers == null ? null : indexAnalyzers.getIndexSettings();
// custom normalizer = if normalizer == null but filter or char_filter is not null and tokenizer/analyzer is null
// get charfilter and filter from request
List<CharFilterFactory> charFilterFactoryList =
parseCharFilterFactories(request, indexSettings, analysisRegistry, environment, true);

final String keywordTokenizerName = "keyword";
TokenizerFactory keywordTokenizerFactory = getTokenizerFactory(analysisRegistry, environment, keywordTokenizerName);

List<TokenFilterFactory> tokenFilterFactoryList =
parseTokenFilterFactories(request, indexSettings, analysisRegistry, environment, new Tuple<>(keywordTokenizerName, keywordTokenizerFactory), charFilterFactoryList, true);

analyzer = new CustomAnalyzer("keyword_for_normalizer",
keywordTokenizerFactory,
charFilterFactoryList.toArray(new CharFilterFactory[charFilterFactoryList.size()]),
tokenFilterFactoryList.toArray(new TokenFilterFactory[tokenFilterFactoryList.size()]));
closeAnalyzer = true;
} else if (analyzer == null) {
if (indexAnalyzers == null) {
analyzer = analysisRegistry.getAnalyzer("standard");
Expand Down Expand Up @@ -465,7 +492,7 @@ private static Map<String, Object> extractExtendedAttributes(TokenStream stream,
}

private static List<CharFilterFactory> parseCharFilterFactories(AnalyzeRequest request, IndexSettings indexSettings, AnalysisRegistry analysisRegistry,
Environment environment) throws IOException {
Environment environment, boolean normalizer) throws IOException {
List<CharFilterFactory> charFilterFactoryList = new ArrayList<>();
if (request.charFilters() != null && request.charFilters().size() > 0) {
List<AnalyzeRequest.NameOrDefinition> charFilters = request.charFilters();
Expand Down Expand Up @@ -506,6 +533,13 @@ private static List<CharFilterFactory> parseCharFilterFactories(AnalyzeRequest r
if (charFilterFactory == null) {
throw new IllegalArgumentException("failed to find char filter under [" + charFilter.name + "]");
}
if (normalizer) {
if (charFilterFactory instanceof MultiTermAwareComponent == false) {
throw new IllegalArgumentException("Custom normalizer may not use char filter ["
+ charFilterFactory.name() + "]");
}
charFilterFactory = (CharFilterFactory) ((MultiTermAwareComponent) charFilterFactory).getMultiTermComponent();
}
charFilterFactoryList.add(charFilterFactory);
}
}
Expand All @@ -514,7 +548,7 @@ private static List<CharFilterFactory> parseCharFilterFactories(AnalyzeRequest r

private static List<TokenFilterFactory> parseTokenFilterFactories(AnalyzeRequest request, IndexSettings indexSettings, AnalysisRegistry analysisRegistry,
Environment environment, Tuple<String, TokenizerFactory> tokenizerFactory,
List<CharFilterFactory> charFilterFactoryList) throws IOException {
List<CharFilterFactory> charFilterFactoryList, boolean normalizer) throws IOException {
List<TokenFilterFactory> tokenFilterFactoryList = new ArrayList<>();
if (request.tokenFilters() != null && request.tokenFilters().size() > 0) {
List<AnalyzeRequest.NameOrDefinition> tokenFilters = request.tokenFilters();
Expand Down Expand Up @@ -561,6 +595,13 @@ private static List<TokenFilterFactory> parseTokenFilterFactories(AnalyzeRequest
if (tokenFilterFactory == null) {
throw new IllegalArgumentException("failed to find or create token filter under [" + tokenFilter.name + "]");
}
if (normalizer) {
if (tokenFilterFactory instanceof MultiTermAwareComponent == false) {
throw new IllegalArgumentException("Custom normalizer may not use filter ["
+ tokenFilterFactory.name() + "]");
}
tokenFilterFactory = (TokenFilterFactory) ((MultiTermAwareComponent) tokenFilterFactory).getMultiTermComponent();
}
tokenFilterFactoryList.add(tokenFilterFactory);
}
}
Expand Down Expand Up @@ -590,12 +631,8 @@ private static Tuple<String, TokenizerFactory> parseTokenizerFactory(AnalyzeRequ
} else {
AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory;
if (indexAnalzyers == null) {
tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizer.name);
if (tokenizerFactoryFactory == null) {
throw new IllegalArgumentException("failed to find global tokenizer under [" + tokenizer.name + "]");
}
tokenizerFactory = getTokenizerFactory(analysisRegistry, environment, tokenizer.name);
name = tokenizer.name;
tokenizerFactory = tokenizerFactoryFactory.get(environment, tokenizer.name);
} else {
tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizer.name, indexAnalzyers.getIndexSettings());
if (tokenizerFactoryFactory == null) {
Expand All @@ -610,6 +647,17 @@ private static Tuple<String, TokenizerFactory> parseTokenizerFactory(AnalyzeRequ
return new Tuple<>(name, tokenizerFactory);
}

private static TokenizerFactory getTokenizerFactory(AnalysisRegistry analysisRegistry, Environment environment, String name) throws IOException {
AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory;
TokenizerFactory tokenizerFactory;
tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(name);
if (tokenizerFactoryFactory == null) {
throw new IllegalArgumentException("failed to find global tokenizer under [" + name + "]");
}
tokenizerFactory = tokenizerFactoryFactory.get(environment, name);
return tokenizerFactory;
}

private static IndexSettings getNaIndexSettings(Settings settings) {
IndexMetaData metaData = IndexMetaData.builder(IndexMetaData.INDEX_UUID_NA_VALUE).settings(settings).build();
return new IndexSettings(metaData, Settings.EMPTY);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,15 +65,6 @@ public IdsQueryBuilder() {
// nothing to do
}

/**
* Creates a new IdsQueryBuilder by providing the types of the documents to look for
* @deprecated Replaced by {@link #types(String...)}
*/
@Deprecated
public IdsQueryBuilder(String... types) {
types(types);
}

/**
* Read from a stream.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,9 @@ public int getSize() {
* documents.
*/
public Self setSize(int size) {
if (size < 0) {
throw new IllegalArgumentException("[size] parameter cannot be negative, found [" + size + "]");
}
this.size = size;
return self();
}
Expand Down Expand Up @@ -367,10 +370,13 @@ protected Self doForSlice(Self request, TaskId slicingTask) {
.setShouldStoreResult(false)
// Split requests per second between all slices
.setRequestsPerSecond(requestsPerSecond / slices)
// Size is split between workers. This means the size might round down!
.setSize(size == SIZE_ALL_MATCHES ? SIZE_ALL_MATCHES : size / slices)
// Sub requests don't have workers
.setSlices(1);
if (size != -1) {
// Size is split between workers. This means the size might round
// down!
request.setSize(size == SIZE_ALL_MATCHES ? SIZE_ALL_MATCHES : size / slices);
}
// Set the parent task so this task is cancelled if we cancel the parent
request.setParentTask(slicingTask);
// TODO It'd be nice not to refresh on every slice. Instead we should refresh after the sub requests finish.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ public static class Fields {
public static final ParseField CHAR_FILTERS = new ParseField("char_filter");
public static final ParseField EXPLAIN = new ParseField("explain");
public static final ParseField ATTRIBUTES = new ParseField("attributes");
public static final ParseField NORMALIZER = new ParseField("normalizer");
}

public RestAnalyzeAction(Settings settings, RestController controller) {
Expand Down Expand Up @@ -147,6 +148,12 @@ static void buildFromContent(XContentParser parser, AnalyzeRequest analyzeReques
attributes.add(parser.text());
}
analyzeRequest.attributes(attributes.toArray(new String[attributes.size()]));
} else if (Fields.NORMALIZER.match(currentFieldName)) {
if (token == XContentParser.Token.VALUE_STRING) {
analyzeRequest.normalizer(parser.text());
} else {
throw new IllegalArgumentException(currentFieldName + " should be normalizer's name");
}
} else {
throw new IllegalArgumentException("Unknown parameter ["
+ currentFieldName + "] in request body or parameter is of the wrong type[" + token + "] ");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,9 @@ public int from() {
* The number of search hits to return. Defaults to <tt>10</tt>.
*/
public SearchSourceBuilder size(int size) {
if (size < 0) {
throw new IllegalArgumentException("[size] parameter cannot be negative, found [" + size + "]");
}
this.size = size;
return this;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,9 @@ public void setUp() throws Exception {
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
.put(IndexMetaData.SETTING_INDEX_UUID, UUIDs.randomBase64UUID())
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard")
.put("index.analysis.analyzer.custom_analyzer.filter", "mock").build();
.put("index.analysis.analyzer.custom_analyzer.filter", "mock")
.put("index.analysis.normalizer.my_normalizer.type", "custom")
.putArray("index.analysis.normalizer.my_normalizer.filter", "lowercase").build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
environment = new Environment(settings);
AnalysisPlugin plugin = new AnalysisPlugin() {
Expand Down Expand Up @@ -304,6 +306,14 @@ public void testUnknown() throws IOException {
} else {
assertEquals(e.getMessage(), "failed to find global char filter under [foobar]");
}

e = expectThrows(IllegalArgumentException.class,
() -> TransportAnalyzeAction.analyze(
new AnalyzeRequest()
.normalizer("foobar")
.text("the qu1ck brown fox"),
AllFieldMapper.NAME, null, indexAnalyzers, registry, environment));
assertEquals(e.getMessage(), "failed to find normalizer under [foobar]");
}

public void testNonPreBuildTokenFilter() throws IOException {
Expand All @@ -317,6 +327,16 @@ public void testNonPreBuildTokenFilter() throws IOException {
int default_bucket_size = 512;
int default_hash_set_size = 1;
assertEquals(default_hash_count * default_bucket_size * default_hash_set_size, tokens.size());
}

public void testNormalizerWithIndex() throws IOException {
AnalyzeRequest request = new AnalyzeRequest("index");
request.normalizer("my_normalizer");
request.text("ABc");
AnalyzeResponse analyze = TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, indexAnalyzers, registry, environment);
List<AnalyzeResponse.AnalyzeToken> tokens = analyze.getTokens();

assertEquals(1, tokens.size());
assertEquals("abc", tokens.get(0).getTerm());
}
}
Loading

0 comments on commit ab8eeb3

Please sign in to comment.