Skip to content

Commit

Permalink
Merge branch 'master' into hlclient/add-delete-method
Browse files Browse the repository at this point in the history
  • Loading branch information
dadoonet committed Feb 24, 2017
2 parents 4ebc6dd + 211d50f commit 3e4b917
Show file tree
Hide file tree
Showing 22 changed files with 797 additions and 1,251 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@
import org.elasticsearch.index.analysis.UpperCaseTokenFilterFactory;
import org.elasticsearch.index.analysis.WhitespaceAnalyzerProvider;
import org.elasticsearch.index.analysis.WhitespaceTokenizerFactory;
import org.elasticsearch.index.analysis.WordDelimiterGraphTokenFilterFactory;
import org.elasticsearch.index.analysis.WordDelimiterTokenFilterFactory;
import org.elasticsearch.index.analysis.compound.DictionaryCompoundWordTokenFilterFactory;
import org.elasticsearch.index.analysis.compound.HyphenationCompoundWordTokenFilterFactory;
Expand Down Expand Up @@ -225,6 +226,7 @@ private NamedRegistry<AnalysisProvider<TokenFilterFactory>> setupTokenFilters(Li
tokenFilters.register("snowball", SnowballTokenFilterFactory::new);
tokenFilters.register("stemmer", StemmerTokenFilterFactory::new);
tokenFilters.register("word_delimiter", WordDelimiterTokenFilterFactory::new);
tokenFilters.register("word_delimiter_graph", WordDelimiterGraphTokenFilterFactory::new);
tokenFilters.register("delimited_payload_filter", DelimitedPayloadTokenFilterFactory::new);
tokenFilters.register("elision", ElisionTokenFilterFactory::new);
tokenFilters.register("flatten_graph", FlattenGraphTokenFilterFactory::new);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
import org.apache.lucene.analysis.miscellaneous.TruncateTokenFilter;
import org.apache.lucene.analysis.miscellaneous.UniqueTokenFilter;
import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
import org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter;
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
import org.apache.lucene.analysis.ngram.NGramTokenFilter;
import org.apache.lucene.analysis.payloads.DelimitedPayloadTokenFilter;
Expand Down Expand Up @@ -87,6 +88,18 @@ public TokenStream create(TokenStream tokenStream, Version version) {
}
},

WORD_DELIMITER_GRAPH(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new WordDelimiterGraphFilter(tokenStream,
WordDelimiterGraphFilter.GENERATE_WORD_PARTS |
WordDelimiterGraphFilter.GENERATE_NUMBER_PARTS |
WordDelimiterGraphFilter.SPLIT_ON_CASE_CHANGE |
WordDelimiterGraphFilter.SPLIT_ON_NUMERICS |
WordDelimiterGraphFilter.STEM_ENGLISH_POSSESSIVE, null);
}
},

STOP(CachingStrategy.LUCENE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
import org.elasticsearch.Version;
import org.elasticsearch.action.support.ToXContentToBytes;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.ParsingException;
Expand All @@ -32,10 +33,12 @@
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryParseContext;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder.BoundaryScannerType;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder.Order;

import java.io.IOException;
import java.util.Arrays;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.function.BiFunction;
Expand All @@ -57,8 +60,10 @@ public abstract class AbstractHighlighterBuilder<HB extends AbstractHighlighterB
public static final ParseField NUMBER_OF_FRAGMENTS_FIELD = new ParseField("number_of_fragments");
public static final ParseField ENCODER_FIELD = new ParseField("encoder");
public static final ParseField REQUIRE_FIELD_MATCH_FIELD = new ParseField("require_field_match");
public static final ParseField BOUNDARY_SCANNER_FIELD = new ParseField("boundary_scanner");
public static final ParseField BOUNDARY_MAX_SCAN_FIELD = new ParseField("boundary_max_scan");
public static final ParseField BOUNDARY_CHARS_FIELD = new ParseField("boundary_chars");
public static final ParseField BOUNDARY_SCANNER_LOCALE_FIELD = new ParseField("boundary_scanner_locale");
public static final ParseField TYPE_FIELD = new ParseField("type");
public static final ParseField FRAGMENTER_FIELD = new ParseField("fragmenter");
public static final ParseField NO_MATCH_SIZE_FIELD = new ParseField("no_match_size");
Expand Down Expand Up @@ -88,10 +93,14 @@ public abstract class AbstractHighlighterBuilder<HB extends AbstractHighlighterB

protected Boolean forceSource;

protected BoundaryScannerType boundaryScannerType;

protected Integer boundaryMaxScan;

protected char[] boundaryChars;

protected Locale boundaryScannerLocale;

protected Integer noMatchSize;

protected Integer phraseLimit;
Expand Down Expand Up @@ -119,10 +128,18 @@ protected AbstractHighlighterBuilder(StreamInput in) throws IOException {
order(in.readOptionalWriteable(Order::readFromStream));
highlightFilter(in.readOptionalBoolean());
forceSource(in.readOptionalBoolean());
if (in.getVersion().onOrAfter(Version.V_5_4_0_UNRELEASED)) {
boundaryScannerType(in.readOptionalWriteable(BoundaryScannerType::readFromStream));
}
boundaryMaxScan(in.readOptionalVInt());
if (in.readBoolean()) {
boundaryChars(in.readString().toCharArray());
}
if (in.getVersion().onOrAfter(Version.V_5_4_0_UNRELEASED)) {
if (in.readBoolean()) {
boundaryScannerLocale(in.readString());
}
}
noMatchSize(in.readOptionalVInt());
phraseLimit(in.readOptionalVInt());
if (in.readBoolean()) {
Expand Down Expand Up @@ -150,12 +167,22 @@ public final void writeTo(StreamOutput out) throws IOException {
out.writeOptionalWriteable(order);
out.writeOptionalBoolean(highlightFilter);
out.writeOptionalBoolean(forceSource);
if (out.getVersion().onOrAfter(Version.V_5_4_0_UNRELEASED)) {
out.writeOptionalWriteable(boundaryScannerType);
}
out.writeOptionalVInt(boundaryMaxScan);
boolean hasBounaryChars = boundaryChars != null;
out.writeBoolean(hasBounaryChars);
if (hasBounaryChars) {
out.writeString(String.valueOf(boundaryChars));
}
if (out.getVersion().onOrAfter(Version.V_5_4_0_UNRELEASED)) {
boolean hasBoundaryScannerLocale = boundaryScannerLocale != null;
out.writeBoolean(hasBoundaryScannerLocale);
if (hasBoundaryScannerLocale) {
out.writeString(boundaryScannerLocale.toLanguageTag());
}
}
out.writeOptionalVInt(noMatchSize);
out.writeOptionalVInt(phraseLimit);
boolean hasOptions = options != null;
Expand Down Expand Up @@ -331,6 +358,33 @@ public Boolean highlightFilter() {
return this.highlightFilter;
}

/**
* When using the highlighterType <tt>fvh</tt> this setting
* controls which scanner to use for fragment boundaries, and defaults to "simple".
*/
@SuppressWarnings("unchecked")
public HB boundaryScannerType(String boundaryScannerType) {
this.boundaryScannerType = BoundaryScannerType.fromString(boundaryScannerType);
return (HB) this;
}

/**
* When using the highlighterType <tt>fvh</tt> this setting
* controls which scanner to use for fragment boundaries, and defaults to "simple".
*/
@SuppressWarnings("unchecked")
public HB boundaryScannerType(BoundaryScannerType boundaryScannerType) {
this.boundaryScannerType = boundaryScannerType;
return (HB) this;
}

/**
* @return the value set by {@link #boundaryScannerType(String)}
*/
public BoundaryScannerType boundaryScannerType() {
return this.boundaryScannerType;
}

/**
* When using the highlighterType <tt>fvh</tt> this setting
* controls how far to look for boundary characters, and defaults to 20.
Expand Down Expand Up @@ -366,6 +420,25 @@ public char[] boundaryChars() {
return this.boundaryChars;
}

/**
* When using the highlighterType <tt>fvh</tt> and boundaryScannerType <tt>break_iterator</tt>, this setting
* controls the locale to use by the BreakIterator, defaults to "root".
*/
@SuppressWarnings("unchecked")
public HB boundaryScannerLocale(String boundaryScannerLocale) {
if (boundaryScannerLocale != null) {
this.boundaryScannerLocale = Locale.forLanguageTag(boundaryScannerLocale);
}
return (HB) this;
}

/**
* @return the value set by {@link #boundaryScannerLocale(String)}
*/
public Locale boundaryScannerLocale() {
return this.boundaryScannerLocale;
}

/**
* Allows to set custom options for custom highlighters.
*/
Expand Down Expand Up @@ -491,12 +564,18 @@ void commonOptionsToXContent(XContentBuilder builder) throws IOException {
if (highlightFilter != null) {
builder.field(HIGHLIGHT_FILTER_FIELD.getPreferredName(), highlightFilter);
}
if (boundaryScannerType != null) {
builder.field(BOUNDARY_SCANNER_FIELD.getPreferredName(), boundaryScannerType.name());
}
if (boundaryMaxScan != null) {
builder.field(BOUNDARY_MAX_SCAN_FIELD.getPreferredName(), boundaryMaxScan);
}
if (boundaryChars != null) {
builder.field(BOUNDARY_CHARS_FIELD.getPreferredName(), new String(boundaryChars));
}
if (boundaryScannerLocale != null) {
builder.field(BOUNDARY_SCANNER_LOCALE_FIELD.getPreferredName(), boundaryScannerLocale.toLanguageTag());
}
if (options != null && options.size() > 0) {
builder.field(OPTIONS_FIELD.getPreferredName(), options);
}
Expand All @@ -523,8 +602,10 @@ static <HB extends AbstractHighlighterBuilder<HB>> BiFunction<QueryParseContext,
parser.declareInt(HB::fragmentSize, FRAGMENT_SIZE_FIELD);
parser.declareInt(HB::numOfFragments, NUMBER_OF_FRAGMENTS_FIELD);
parser.declareBoolean(HB::requireFieldMatch, REQUIRE_FIELD_MATCH_FIELD);
parser.declareString(HB::boundaryScannerType, BOUNDARY_SCANNER_FIELD);
parser.declareInt(HB::boundaryMaxScan, BOUNDARY_MAX_SCAN_FIELD);
parser.declareString((HB hb, String bc) -> hb.boundaryChars(bc.toCharArray()) , BOUNDARY_CHARS_FIELD);
parser.declareString(HB::boundaryScannerLocale, BOUNDARY_SCANNER_LOCALE_FIELD);
parser.declareString(HB::highlighterType, TYPE_FIELD);
parser.declareString(HB::fragmenter, FRAGMENTER_FIELD);
parser.declareInt(HB::noMatchSize, NO_MATCH_SIZE_FIELD);
Expand Down Expand Up @@ -562,8 +643,8 @@ static <HB extends AbstractHighlighterBuilder<HB>> BiFunction<QueryParseContext,
public final int hashCode() {
return Objects.hash(getClass(), Arrays.hashCode(preTags), Arrays.hashCode(postTags), fragmentSize,
numOfFragments, highlighterType, fragmenter, highlightQuery, order, highlightFilter,
forceSource, boundaryMaxScan, Arrays.hashCode(boundaryChars), noMatchSize,
phraseLimit, options, requireFieldMatch, doHashCode());
forceSource, boundaryScannerType, boundaryMaxScan, Arrays.hashCode(boundaryChars), boundaryScannerLocale,
noMatchSize, phraseLimit, options, requireFieldMatch, doHashCode());
}

/**
Expand Down Expand Up @@ -591,8 +672,10 @@ public final boolean equals(Object obj) {
Objects.equals(order, other.order) &&
Objects.equals(highlightFilter, other.highlightFilter) &&
Objects.equals(forceSource, other.forceSource) &&
Objects.equals(boundaryScannerType, other.boundaryScannerType) &&
Objects.equals(boundaryMaxScan, other.boundaryMaxScan) &&
Arrays.equals(boundaryChars, other.boundaryChars) &&
Objects.equals(boundaryScannerLocale, other.boundaryScannerLocale) &&
Objects.equals(noMatchSize, other.noMatchSize) &&
Objects.equals(phraseLimit, other.phraseLimit) &&
Objects.equals(options, other.options) &&
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.lucene.search.highlight.Encoder;
import org.apache.lucene.search.vectorhighlight.BaseFragmentsBuilder;
import org.apache.lucene.search.vectorhighlight.BoundaryScanner;
import org.apache.lucene.search.vectorhighlight.BreakIteratorBoundaryScanner;
import org.apache.lucene.search.vectorhighlight.CustomFieldQuery;
import org.apache.lucene.search.vectorhighlight.FieldFragList;
import org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseInfo;
Expand All @@ -38,15 +39,23 @@
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
import org.elasticsearch.search.fetch.FetchSubPhase;
import org.elasticsearch.search.fetch.subphase.highlight.SearchContextHighlight.Field;
import org.elasticsearch.search.fetch.subphase.highlight.SearchContextHighlight.FieldOptions;
import org.elasticsearch.search.internal.SearchContext;

import java.text.BreakIterator;
import java.util.Collections;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;

public class FastVectorHighlighter implements Highlighter {

private static final SimpleBoundaryScanner DEFAULT_BOUNDARY_SCANNER = new SimpleBoundaryScanner();
private static final BoundaryScanner DEFAULT_SIMPLE_BOUNDARY_SCANNER = new SimpleBoundaryScanner();
private static final BoundaryScanner DEFAULT_SENTENCE_BOUNDARY_SCANNER = new BreakIteratorBoundaryScanner(
BreakIterator.getSentenceInstance(Locale.ROOT));
private static final BoundaryScanner DEFAULT_WORD_BOUNDARY_SCANNER = new BreakIteratorBoundaryScanner(
BreakIterator.getWordInstance(Locale.ROOT));

public static final Setting<Boolean> SETTING_TV_HIGHLIGHT_MULTI_VALUE = Setting.boolSetting("search.highlight.term_vector_multi_value",
true, Setting.Property.NodeScope);
Expand Down Expand Up @@ -105,12 +114,7 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
FragListBuilder fragListBuilder;
BaseFragmentsBuilder fragmentsBuilder;

BoundaryScanner boundaryScanner = DEFAULT_BOUNDARY_SCANNER;
if (field.fieldOptions().boundaryMaxScan() != SimpleBoundaryScanner.DEFAULT_MAX_SCAN
|| field.fieldOptions().boundaryChars() != SimpleBoundaryScanner.DEFAULT_BOUNDARY_CHARS) {
boundaryScanner = new SimpleBoundaryScanner(field.fieldOptions().boundaryMaxScan(),
field.fieldOptions().boundaryChars());
}
final BoundaryScanner boundaryScanner = getBoundaryScanner(field);
boolean forceSource = context.highlight().forceSource(field);
if (field.fieldOptions().numberOfFragments() == 0) {
fragListBuilder = new SingleFragListBuilder();
Expand Down Expand Up @@ -206,6 +210,29 @@ public boolean canHighlight(FieldMapper fieldMapper) {
&& fieldMapper.fieldType().storeTermVectorPositions();
}

private static BoundaryScanner getBoundaryScanner(Field field) {
final FieldOptions fieldOptions = field.fieldOptions();
final Locale boundaryScannerLocale = fieldOptions.boundaryScannerLocale();
switch(fieldOptions.boundaryScannerType()) {
case SENTENCE:
if (boundaryScannerLocale != null) {
return new BreakIteratorBoundaryScanner(BreakIterator.getSentenceInstance(boundaryScannerLocale));
}
return DEFAULT_SENTENCE_BOUNDARY_SCANNER;
case WORD:
if (boundaryScannerLocale != null) {
return new BreakIteratorBoundaryScanner(BreakIterator.getWordInstance(boundaryScannerLocale));
}
return DEFAULT_WORD_BOUNDARY_SCANNER;
default:
if (fieldOptions.boundaryMaxScan() != SimpleBoundaryScanner.DEFAULT_MAX_SCAN
|| fieldOptions.boundaryChars() != SimpleBoundaryScanner.DEFAULT_BOUNDARY_CHARS) {
return new SimpleBoundaryScanner(fieldOptions.boundaryMaxScan(), fieldOptions.boundaryChars());
}
return DEFAULT_SIMPLE_BOUNDARY_SCANNER;
}
}

private class MapperHighlightEntry {
public FragListBuilder fragListBuilder;
public FragmentsBuilder fragmentsBuilder;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,9 @@ public class HighlightBuilder extends AbstractHighlighterBuilder<HighlightBuilde
.preTags(DEFAULT_PRE_TAGS).postTags(DEFAULT_POST_TAGS).scoreOrdered(DEFAULT_SCORE_ORDERED)
.highlightFilter(DEFAULT_HIGHLIGHT_FILTER).requireFieldMatch(DEFAULT_REQUIRE_FIELD_MATCH)
.forceSource(DEFAULT_FORCE_SOURCE).fragmentCharSize(DEFAULT_FRAGMENT_CHAR_SIZE)
.numberOfFragments(DEFAULT_NUMBER_OF_FRAGMENTS).encoder(DEFAULT_ENCODER)
.numberOfFragments(DEFAULT_NUMBER_OF_FRAGMENTS).encoder(DEFAULT_ENCODER).boundaryScannerType(BoundaryScannerType.CHARS)
.boundaryMaxScan(SimpleBoundaryScanner.DEFAULT_MAX_SCAN).boundaryChars(SimpleBoundaryScanner.DEFAULT_BOUNDARY_CHARS)
.noMatchSize(DEFAULT_NO_MATCH_SIZE).phraseLimit(DEFAULT_PHRASE_LIMIT).build();
.boundaryScannerLocale(Locale.ROOT).noMatchSize(DEFAULT_NO_MATCH_SIZE).phraseLimit(DEFAULT_PHRASE_LIMIT).build();

private final List<Field> fields = new ArrayList<>();

Expand Down Expand Up @@ -327,12 +327,18 @@ private static void transferOptions(AbstractHighlighterBuilder highlighterBuilde
if (highlighterBuilder.requireFieldMatch != null) {
targetOptionsBuilder.requireFieldMatch(highlighterBuilder.requireFieldMatch);
}
if (highlighterBuilder.boundaryScannerType != null) {
targetOptionsBuilder.boundaryScannerType(highlighterBuilder.boundaryScannerType);
}
if (highlighterBuilder.boundaryMaxScan != null) {
targetOptionsBuilder.boundaryMaxScan(highlighterBuilder.boundaryMaxScan);
}
if (highlighterBuilder.boundaryChars != null) {
targetOptionsBuilder.boundaryChars(convertCharArray(highlighterBuilder.boundaryChars));
}
if (highlighterBuilder.boundaryScannerLocale != null) {
targetOptionsBuilder.boundaryScannerLocale(highlighterBuilder.boundaryScannerLocale);
}
if (highlighterBuilder.highlighterType != null) {
targetOptionsBuilder.highlighterType(highlighterBuilder.highlighterType);
}
Expand Down Expand Up @@ -522,4 +528,30 @@ public String toString() {
return name().toLowerCase(Locale.ROOT);
}
}

public enum BoundaryScannerType implements Writeable {
CHARS, WORD, SENTENCE;

public static BoundaryScannerType readFromStream(StreamInput in) throws IOException {
int ordinal = in.readVInt();
if (ordinal < 0 || ordinal >= values().length) {
throw new IOException("Unknown BoundaryScannerType ordinal [" + ordinal + "]");
}
return values()[ordinal];
}

@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeVInt(this.ordinal());
}

public static BoundaryScannerType fromString(String boundaryScannerType) {
return valueOf(boundaryScannerType.toUpperCase(Locale.ROOT));
}

@Override
public String toString() {
return name().toLowerCase(Locale.ROOT);
}
}
}
Loading

0 comments on commit 3e4b917

Please sign in to comment.