Skip to content

Commit

Permalink
Prevent Phrase Suggester from failing on missing fields.
Browse files Browse the repository at this point in the history
Unless the field is not mapped phrase suggester should return
empty results or skip candidate generation if a field in not in
the index rather than failing hard with an illegal argument exception.
Some shards might not have a value in a certain field.

Closes elastic#3469
  • Loading branch information
s1monw committed Aug 16, 2013
1 parent ac06722 commit af05fbd
Show file tree
Hide file tree
Showing 9 changed files with 246 additions and 112 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,8 @@
*/
package org.elasticsearch.search.suggest.phrase;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.*;
import org.apache.lucene.search.spell.DirectSpellChecker;
import org.apache.lucene.search.spell.SuggestMode;
import org.apache.lucene.search.spell.SuggestWord;
Expand All @@ -38,6 +28,12 @@
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.search.suggest.SuggestUtils;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

//TODO public for tests
public final class DirectCandidateGenerator extends CandidateGenerator {

Expand All @@ -58,20 +54,19 @@ public final class DirectCandidateGenerator extends CandidateGenerator {
private final int numCandidates;

public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates) throws IOException {
this(spellchecker, field, suggestMode, reader, nonErrorLikelihood, numCandidates, null, null);
this(spellchecker, field, suggestMode, reader, nonErrorLikelihood, numCandidates, null, null, MultiFields.getTerms(reader, field));
}


public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates, Analyzer preFilter, Analyzer postFilter) throws IOException {
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates, Analyzer preFilter, Analyzer postFilter, Terms terms) throws IOException {
if (terms == null) {
throw new ElasticSearchIllegalArgumentException("generator field [" + field + "] doesn't exist");
}
this.spellchecker = spellchecker;
this.field = field;
this.numCandidates = numCandidates;
this.suggestMode = suggestMode;
this.reader = reader;
Terms terms = MultiFields.getTerms(reader, field);
if (terms == null) {
throw new ElasticSearchIllegalArgumentException("generator field [" + field + "] doesn't exist");
}
final long dictSize = terms.getSumTotalTermFreq();
this.useTotalTermFrequency = dictSize != -1;
this.dictSize = dictSize == -1 ? reader.maxDoc() : dictSize;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,27 +18,28 @@
*/
package org.elasticsearch.search.suggest.phrase;

import java.io.IOException;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Terms;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.search.suggest.SuggestUtils;
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;

import java.io.IOException;
//TODO public for tests
public final class LaplaceScorer extends WordScorer {

public static final WordScorerFactory FACTORY = new WordScorer.WordScorerFactory() {
@Override
public WordScorer newScorer(IndexReader reader, String field, double realWordLikelyhood, BytesRef separator) throws IOException {
return new LaplaceScorer(reader, field, realWordLikelyhood, separator, 0.5);
public WordScorer newScorer(IndexReader reader, Terms terms, String field, double realWordLikelyhood, BytesRef separator) throws IOException {
return new LaplaceScorer(reader, terms, field, realWordLikelyhood, separator, 0.5);
}
};

private double alpha;

public LaplaceScorer(IndexReader reader, String field,
public LaplaceScorer(IndexReader reader, Terms terms, String field,
double realWordLikelyhood, BytesRef separator, double alpha) throws IOException {
super(reader, field, realWordLikelyhood, separator);
super(reader, terms, field, realWordLikelyhood, separator);
this.alpha = alpha;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,23 +18,24 @@
*/
package org.elasticsearch.search.suggest.phrase;

import java.io.IOException;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Terms;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.search.suggest.SuggestUtils;
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;

import java.io.IOException;

//TODO public for tests
public final class LinearInterpoatingScorer extends WordScorer {

private final double unigramLambda;
private final double bigramLambda;
private final double trigramLambda;

public LinearInterpoatingScorer(IndexReader reader, String field, double realWordLikelyhood, BytesRef separator, double trigramLambda, double bigramLambda, double unigramLambda)
public LinearInterpoatingScorer(IndexReader reader, Terms terms, String field, double realWordLikelyhood, BytesRef separator, double trigramLambda, double bigramLambda, double unigramLambda)
throws IOException {
super(reader, field, realWordLikelyhood, separator);
super(reader, terms, field, realWordLikelyhood, separator);
double sum = unigramLambda + bigramLambda + trigramLambda;
this.unigramLambda = unigramLambda / sum;
this.bigramLambda = bigramLambda / sum;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,9 @@
*/
package org.elasticsearch.search.suggest.phrase;

import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Terms;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.xcontent.XContentParser;
Expand All @@ -33,6 +32,8 @@
import org.elasticsearch.search.suggest.SuggestionSearchContext;
import org.elasticsearch.search.suggest.phrase.PhraseSuggestionContext.DirectCandidateGenerator;

import java.io.IOException;

public final class PhraseSuggestParser implements SuggestContextParser {

private PhraseSuggester suggester;
Expand Down Expand Up @@ -135,6 +136,10 @@ public SuggestionSearchContext.SuggestionContext parse(XContentParser parser, Ma
throw new ElasticSearchIllegalArgumentException("The required field option is missing");
}

if (mapperService.smartNameFieldMapper(suggestion.getField()) == null) {
throw new ElasticSearchIllegalArgumentException("No mapping found for field [" + suggestion.getField() + "]");
}

if (suggestion.model() == null) {
suggestion.setModel(StupidBackoffScorer.FACTORY);
}
Expand Down Expand Up @@ -209,9 +214,9 @@ public void parseSmoothingModel(XContentParser parser, PhraseSuggestionContext s
}
suggestion.setModel(new WordScorer.WordScorerFactory() {
@Override
public WordScorer newScorer(IndexReader reader, String field, double realWordLikelyhood, BytesRef separator)
public WordScorer newScorer(IndexReader reader, Terms terms, String field, double realWordLikelyhood, BytesRef separator)
throws IOException {
return new LinearInterpoatingScorer(reader, field, realWordLikelyhood, separator, lambdas[0], lambdas[1],
return new LinearInterpoatingScorer(reader, terms, field, realWordLikelyhood, separator, lambdas[0], lambdas[1],
lambdas[2]);
}
});
Expand All @@ -230,9 +235,9 @@ public WordScorer newScorer(IndexReader reader, String field, double realWordLik
final double alpha = theAlpha;
suggestion.setModel(new WordScorer.WordScorerFactory() {
@Override
public WordScorer newScorer(IndexReader reader, String field, double realWordLikelyhood, BytesRef separator)
public WordScorer newScorer(IndexReader reader, Terms terms, String field, double realWordLikelyhood, BytesRef separator)
throws IOException {
return new LaplaceScorer(reader, field, realWordLikelyhood, separator, alpha);
return new LaplaceScorer(reader, terms, field, realWordLikelyhood, separator, alpha);
}
});

Expand All @@ -250,9 +255,9 @@ public WordScorer newScorer(IndexReader reader, String field, double realWordLik
final double discount = theDiscount;
suggestion.setModel(new WordScorer.WordScorerFactory() {
@Override
public WordScorer newScorer(IndexReader reader, String field, double realWordLikelyhood, BytesRef separator)
public WordScorer newScorer(IndexReader reader, Terms terms, String field, double realWordLikelyhood, BytesRef separator)
throws IOException {
return new StupidBackoffScorer(reader, field, realWordLikelyhood, separator, discount);
return new StupidBackoffScorer(reader, terms, field, realWordLikelyhood, separator, discount);
}
});

Expand Down Expand Up @@ -281,6 +286,9 @@ private void parseCandidateGenerator(XContentParser parser, MapperService mapper
if (!SuggestUtils.parseDirectSpellcheckerSettings(parser, fieldName, generator)) {
if ("field".equals(fieldName)) {
generator.setField(parser.text());
if (mapperService.smartNameFieldMapper(generator.field()) == null) {
throw new ElasticSearchIllegalArgumentException("No mapping found for field [" + generator.field() + "]");
}
} else if ("size".equals(fieldName)) {
generator.size(parser.intValue());
} else if ("pre_filter".equals(fieldName) || "preFilter".equals(fieldName)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@

import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Terms;
import org.apache.lucene.search.spell.DirectSpellChecker;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
Expand All @@ -30,9 +32,11 @@
import org.elasticsearch.search.suggest.Suggest.Suggestion;
import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry;
import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry.Option;
import org.elasticsearch.search.suggest.SuggestContextParser;
import org.elasticsearch.search.suggest.SuggestUtils;
import org.elasticsearch.search.suggest.Suggester;
import org.elasticsearch.search.suggest.*;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import java.io.IOException;
import java.util.List;
Expand All @@ -52,41 +56,50 @@ public final class PhraseSuggester extends Suggester<PhraseSuggestionContext> {
public Suggestion<? extends Entry<? extends Option>> innerExecute(String name, PhraseSuggestionContext suggestion,
IndexReader indexReader, CharsRef spare) throws IOException {
double realWordErrorLikelihood = suggestion.realworldErrorLikelyhood();
UnicodeUtil.UTF8toUTF16(suggestion.getText(), spare);
Suggestion.Entry<Option> resultEntry = new Suggestion.Entry<Option>(new StringText(spare.toString()), 0, spare.length);
final Suggestion<Entry<Option>> response = new Suggestion<Entry<Option>>(name, suggestion.getSize());
response.addTerm(resultEntry);

List<PhraseSuggestionContext.DirectCandidateGenerator> generators = suggestion.generators();
CandidateGenerator[] gens = new CandidateGenerator[generators.size()];
for (int i = 0; i < gens.length; i++) {
final int numGenerators = generators.size();
final List<CandidateGenerator> gens = new ArrayList<CandidateGenerator>(generators.size());
for (int i = 0; i < numGenerators; i++) {
PhraseSuggestionContext.DirectCandidateGenerator generator = generators.get(i);
DirectSpellChecker directSpellChecker = SuggestUtils.getDirectSpellChecker(generator);
gens[i] = new DirectCandidateGenerator(directSpellChecker, generator.field(), generator.suggestMode(), indexReader, realWordErrorLikelihood, generator.size(), generator.preFilter(), generator.postFilter());
Terms terms = MultiFields.getTerms(indexReader, generator.field());
if (terms != null) {
gens.add(new DirectCandidateGenerator(directSpellChecker, generator.field(), generator.suggestMode(),
indexReader, realWordErrorLikelihood, generator.size(), generator.preFilter(), generator.postFilter(), terms));
}
}


final NoisyChannelSpellChecker checker = new NoisyChannelSpellChecker(realWordErrorLikelihood, suggestion.getRequireUnigram(), suggestion.getTokenLimit());
final BytesRef separator = suggestion.separator();
TokenStream stream = checker.tokenStream(suggestion.getAnalyzer(), suggestion.getText(), spare, suggestion.getField());
WordScorer wordScorer = suggestion.model().newScorer(indexReader, suggestion.getField(), realWordErrorLikelihood, separator);
Correction[] corrections = checker.getCorrections(stream, new MultiCandidateGeneratorWrapper(suggestion.getShardSize(), gens), suggestion.maxErrors(),
suggestion.getShardSize(), indexReader,wordScorer , separator, suggestion.confidence(), suggestion.gramSize());

UnicodeUtil.UTF8toUTF16(suggestion.getText(), spare);

Suggestion.Entry<Option> resultEntry = new Suggestion.Entry<Option>(new StringText(spare.toString()), 0, spare.length);
BytesRef byteSpare = new BytesRef();
for (Correction correction : corrections) {
UnicodeUtil.UTF8toUTF16(correction.join(SEPARATOR, byteSpare, null, null), spare);
Text phrase = new StringText(spare.toString());
Text highlighted = null;
if (suggestion.getPreTag() != null) {
UnicodeUtil.UTF8toUTF16(correction.join(SEPARATOR, byteSpare, suggestion.getPreTag(), suggestion.getPostTag()), spare);
highlighted = new StringText(spare.toString());
final String suggestField = suggestion.getField();
final Terms suggestTerms = MultiFields.getTerms(indexReader, suggestField);
if (gens.size() > 0 && suggestTerms != null) {
final NoisyChannelSpellChecker checker = new NoisyChannelSpellChecker(realWordErrorLikelihood, suggestion.getRequireUnigram(), suggestion.getTokenLimit());
final BytesRef separator = suggestion.separator();
TokenStream stream = checker.tokenStream(suggestion.getAnalyzer(), suggestion.getText(), spare, suggestion.getField());

WordScorer wordScorer = suggestion.model().newScorer(indexReader, suggestTerms, suggestField, realWordErrorLikelihood, separator);
Correction[] corrections = checker.getCorrections(stream, new MultiCandidateGeneratorWrapper(suggestion.getShardSize(),
gens.toArray(new CandidateGenerator[gens.size()])), suggestion.maxErrors(),
suggestion.getShardSize(), indexReader,wordScorer , separator, suggestion.confidence(), suggestion.gramSize());

BytesRef byteSpare = new BytesRef();
for (Correction correction : corrections) {
UnicodeUtil.UTF8toUTF16(correction.join(SEPARATOR, byteSpare, null, null), spare);
Text phrase = new StringText(spare.toString());
Text highlighted = null;
if (suggestion.getPreTag() != null) {
UnicodeUtil.UTF8toUTF16(correction.join(SEPARATOR, byteSpare, suggestion.getPreTag(), suggestion.getPostTag()), spare);
highlighted = new StringText(spare.toString());
}
resultEntry.addOption(new Suggestion.Entry.Option(phrase, highlighted, (float) (correction.score)));
}
resultEntry.addOption(new Suggestion.Entry.Option(phrase, highlighted, (float) (correction.score)));
}
final Suggestion<Entry<Option>> response = new Suggestion<Entry<Option>>(name, suggestion.getSize());
response.addTerm(resultEntry);
return response;
}

@Override
public String[] names() {
return new String[] {"phrase"};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,26 +18,27 @@
*/
package org.elasticsearch.search.suggest.phrase;

import java.io.IOException;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Terms;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.search.suggest.SuggestUtils;
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;

import java.io.IOException;

public class StupidBackoffScorer extends WordScorer {
public static final WordScorerFactory FACTORY = new WordScorer.WordScorerFactory() {
@Override
public WordScorer newScorer(IndexReader reader, String field, double realWordLikelyhood, BytesRef separator) throws IOException {
return new StupidBackoffScorer(reader, field, realWordLikelyhood, separator, 0.4f);
public WordScorer newScorer(IndexReader reader, Terms terms, String field, double realWordLikelyhood, BytesRef separator) throws IOException {
return new StupidBackoffScorer(reader, terms, field, realWordLikelyhood, separator, 0.4f);
}
};

private final double discount;

public StupidBackoffScorer(IndexReader reader, String field, double realWordLikelyhood, BytesRef separator, double discount)
public StupidBackoffScorer(IndexReader reader, Terms terms,String field, double realWordLikelyhood, BytesRef separator, double discount)
throws IOException {
super(reader, field, realWordLikelyhood, separator);
super(reader, terms, field, realWordLikelyhood, separator);
this.discount = discount;
}

Expand Down
Loading

0 comments on commit af05fbd

Please sign in to comment.