Skip to content

Commit

Permalink
Implement k-NN approximate search and mappings bindings (#215)
Browse files Browse the repository at this point in the history
* Implement KNN bindings

Signed-off-by: Thomas Farr <tsfarr@amazon.com>

* Minimal docs

Signed-off-by: Thomas Farr <tsfarr@amazon.com>

* Add to changelog

Signed-off-by: Thomas Farr <tsfarr@amazon.com>

---------

Signed-off-by: Thomas Farr <tsfarr@amazon.com>
(cherry picked from commit 3833005)
  • Loading branch information
Xtansia committed Jun 26, 2023
1 parent 75d5e7a commit bb7c981
Show file tree
Hide file tree
Showing 22 changed files with 618 additions and 7 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)

## [Unreleased]

### Added
- Added support for approximate k-NN search queries and k-NN vector index properties ([#215](https://github.com/opensearch-project/opensearch-net/pull/215))

### Dependencies
- Bumps `System.Reflection.Emit` from 4.3.0 to 4.7.0
- Bumps `Argu` from 5.5.0 to 6.1.1
Expand Down
18 changes: 18 additions & 0 deletions abstractions/src/OpenSearch.OpenSearch.Managed/OpenSearchNode.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Runtime.InteropServices;
using System.Threading;
using OpenSearch.OpenSearch.Managed.Configuration;
using OpenSearch.OpenSearch.Managed.ConsoleWriters;
Expand Down Expand Up @@ -93,9 +95,25 @@ private static Dictionary<string, string> EnvVars(NodeConfiguration config)
if (!string.IsNullOrWhiteSpace(config.FileSystem.OpenSearchHome))
environmentVariables.Add("OPENSEARCH_HOME", config.FileSystem.OpenSearchHome);

var knnLibDir = Path.Combine(config.FileSystem.OpenSearchHome, "plugins", "opensearch-knn", config.Version.Major >= 2 ? "lib" : "knnlib");
if (RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
AppendPathEnvVar("JAVA_LIBRARY_PATH", knnLibDir);
else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux))
AppendPathEnvVar("LD_LIBRARY_PATH", knnLibDir);

return environmentVariables;
}

private static void AppendPathEnvVar(string name, string value)
{
var previous = Environment.GetEnvironmentVariable(name);
Environment.SetEnvironmentVariable(name,
string.IsNullOrWhiteSpace(previous)
? value
: $"{previous}{Path.PathSeparator}{value}"
);
}

private bool AssumedStartedStateChecker(string section, string message)
{
if (AssumeStartedOnNotEnoughMasterPing
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,10 @@ public IProperty Generic(Func<GenericPropertyDescriptor<T>, IGenericProperty> se
public IProperty SearchAsYouType(Func<SearchAsYouTypePropertyDescriptor<T>, ISearchAsYouTypeProperty> selector) =>
selector?.Invoke(new SearchAsYouTypePropertyDescriptor<T>());

/// <inheritdoc />
public IProperty KnnVector(Func<KnnVectorPropertyDescriptor<T>, IKnnVectorProperty> selector) =>
selector?.Invoke(new KnnVectorPropertyDescriptor<T>());

#pragma warning disable CS3001 // Argument type is not CLS-compliant
public IProperty Scalar(Expression<Func<T, int>> field, Func<NumberPropertyDescriptor<T>, INumberProperty> selector = null) =>
selector.InvokeOrDefault(new NumberPropertyDescriptor<T>().Name(field).Type(NumberType.Integer));
Expand Down
5 changes: 4 additions & 1 deletion src/OpenSearch.Client/Mapping/Types/FieldType.cs
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,9 @@ public enum FieldType
RankFeature,

[EnumMember(Value = "rank_features")]
RankFeatures
RankFeatures,

[EnumMember(Value = "knn_vector")]
KnnVector
}
}
6 changes: 6 additions & 0 deletions src/OpenSearch.Client/Mapping/Types/Properties.cs
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,9 @@ TReturnType Nested<TChild>(Func<NestedPropertyDescriptor<T, TChild>, INestedProp

/// <inheritdoc cref="ISearchAsYouTypeProperty"/>
TReturnType SearchAsYouType(Func<SearchAsYouTypePropertyDescriptor<T>, ISearchAsYouTypeProperty> selector);

/// <inheritdoc cref="IKnnVectorProperty" />
TReturnType KnnVector(Func<KnnVectorPropertyDescriptor<T>, IKnnVectorProperty> selector);
}

public partial class PropertiesDescriptor<T> where T : class
Expand Down Expand Up @@ -252,6 +255,9 @@ public PropertiesDescriptor<T> Object<TChild>(Func<ObjectTypeDescriptor<T, TChil
/// <inheritdoc cref="IRankFeaturesProperty"/>
public PropertiesDescriptor<T> RankFeatures(Func<RankFeaturesPropertyDescriptor<T>, IRankFeaturesProperty> selector) => SetProperty(selector);

/// <inheritdoc cref="IKnnVectorProperty" />
public PropertiesDescriptor<T> KnnVector(Func<KnnVectorPropertyDescriptor<T>, IKnnVectorProperty> selector) => SetProperty(selector);

/// <summary>
/// Map a custom property.
/// </summary>
Expand Down
1 change: 1 addition & 0 deletions src/OpenSearch.Client/Mapping/Types/PropertyFormatter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ public IProperty Deserialize(ref JsonReader reader, IJsonFormatterResolver forma
case FieldType.Alias: return Deserialize<FieldAliasProperty>(ref segmentReader, formatterResolver);
case FieldType.RankFeature: return Deserialize<RankFeatureProperty>(ref segmentReader, formatterResolver);
case FieldType.RankFeatures: return Deserialize<RankFeaturesProperty>(ref segmentReader, formatterResolver);
case FieldType.KnnVector: return Deserialize<KnnVectorProperty>(ref segmentReader, formatterResolver);
case FieldType.None:
// no "type" field in the property mapping, or FieldType enum could not be parsed from typeString
return Deserialize<ObjectProperty>(ref segmentReader, formatterResolver);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
/* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Runtime.Serialization;
using OpenSearch.Net.Utf8Json;

namespace OpenSearch.Client;

[ReadAs(typeof(KnnVectorProperty))]
[InterfaceDataContract]
public interface IKnnVectorProperty : IDocValuesProperty
{
/// <summary>
/// The dimension of the vector.
/// </summary>
[DataMember(Name = "dimension")]
int? Dimension { get; set; }

/// <summary>
/// The model to use when the underlying Approximate k-NN algorithm requires a training step.
/// </summary>
[DataMember(Name = "model_id")]
string ModelId { get; set; }

/// <summary>
/// The method to use when the underlying Approximate k-NN algorithm does not require training.
/// </summary>
[DataMember(Name = "method")]
IKnnMethod Method { get; set; }
}

[ReadAs(typeof(KnnMethod))]
[InterfaceDataContract]
public interface IKnnMethod
{
/// <summary>
/// The identifier for the nearest neighbor method.
/// </summary>
[DataMember(Name = "name")]
string Name { get; set; }

/// <summary>
/// The approximate k-NN library to use for indexing and search.
/// </summary>
[DataMember(Name = "engine")]
string Engine { get; set; }

/// <summary>
/// The vector space used to calculate the distance between vectors.
/// </summary>
[DataMember(Name = "space_type")]
string SpaceType { get; set; }

/// <summary>
/// The parameters used for the nearest neighbor method.
/// </summary>
[DataMember(Name = "parameters")]
IDictionary<string, object> Parameters { get; set; }
}

public class KnnMethod : IKnnMethod
{
/// <inheritdoc />
public string Name { get; set; }
/// <inheritdoc />
public string Engine { get; set; }
/// <inheritdoc />
public string SpaceType { get; set; }
/// <inheritdoc />
public IDictionary<string, object> Parameters { get; set; }
}

[InterfaceDataContract]
[JsonFormatter(typeof(VerbatimDictionaryKeysFormatter<KnnMethodParameters, IKnnMethodParameters, string, object>))]
public interface IKnnMethodParameters : IIsADictionary<string, object> { }

public class KnnMethodParameters : IsADictionaryBase<string, object>, IKnnMethodParameters
{
public KnnMethodParameters() { }

public KnnMethodParameters(IDictionary<string, object> container) : base(container) { }

public KnnMethodParameters(Dictionary<string, object> container) : base(container) { }

public void Add(string name, object value) => BackingDictionary.Add(name, value);
}

[DebuggerDisplay("{DebugDisplay}")]
public class KnnVectorProperty : DocValuesPropertyBase, IKnnVectorProperty
{
public KnnVectorProperty() : base(FieldType.KnnVector) { }

/// <inheritdoc />
public int? Dimension { get; set; }
/// <inheritdoc />
public string ModelId { get; set; }
/// <inheritdoc />
public IKnnMethod Method { get; set; }
}

[DebuggerDisplay("{DebugDisplay}")]
public class KnnVectorPropertyDescriptor<T>
: DocValuesPropertyDescriptorBase<KnnVectorPropertyDescriptor<T>, IKnnVectorProperty, T>, IKnnVectorProperty
where T : class
{
public KnnVectorPropertyDescriptor() : base(FieldType.KnnVector) { }

int? IKnnVectorProperty.Dimension { get; set; }
string IKnnVectorProperty.ModelId { get; set; }
IKnnMethod IKnnVectorProperty.Method { get; set; }

/// <inheritdoc cref="IKnnVectorProperty.Dimension" />
public KnnVectorPropertyDescriptor<T> Dimension(int? dimension) =>
Assign(dimension, (p, v) => p.Dimension = v);

/// <inheritdoc cref="IKnnVectorProperty.ModelId" />
public KnnVectorPropertyDescriptor<T> ModelId(string modelId) =>
Assign(modelId, (p, v) => p.ModelId = v);

/// <inheritdoc cref="IKnnVectorProperty.Method" />
public KnnVectorPropertyDescriptor<T> Method(Func<KnnMethodDescriptor, IKnnMethod> selector) =>
Assign(selector, (p, v) => p.Method = v?.Invoke(new KnnMethodDescriptor()));
}

public class KnnMethodDescriptor
: DescriptorBase<KnnMethodDescriptor, IKnnMethod>, IKnnMethod
{
string IKnnMethod.Name { get; set; }
string IKnnMethod.Engine { get; set; }
string IKnnMethod.SpaceType { get; set; }
IDictionary<string, object> IKnnMethod.Parameters { get; set; }

/// <inheritdoc cref="IKnnMethod.Name" />
public KnnMethodDescriptor Name(string name) =>
Assign(name, (c, v) => c.Name = v);

/// <inheritdoc cref="IKnnMethod.Engine" />
public KnnMethodDescriptor Engine(string engine) =>
Assign(engine, (c, v) => c.Engine = v);

/// <inheritdoc cref="IKnnMethod.SpaceType" />
public KnnMethodDescriptor SpaceType(string spaceType) =>
Assign(spaceType, (c, v) => c.SpaceType = v);

/// <inheritdoc cref="IKnnMethod.Parameters" />
public KnnMethodDescriptor Parameters(Func<KnnMethodParametersDescriptor, IPromise<IKnnMethodParameters>> selector) =>
Assign(selector, (c, v) => c.Parameters = v?.Invoke(new KnnMethodParametersDescriptor())?.Value);
}

public class KnnMethodParametersDescriptor : IsADictionaryDescriptorBase<KnnMethodParametersDescriptor, IKnnMethodParameters, string, object>
{
public KnnMethodParametersDescriptor() : base(new KnnMethodParameters()) { }

public KnnMethodParametersDescriptor Parameter(string name, object value) =>
Assign(name, value);
}
2 changes: 2 additions & 0 deletions src/OpenSearch.Client/Mapping/Visitor/IPropertyVisitor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ public interface IPropertyVisitor

void Visit(IFieldAliasProperty type, PropertyInfo propertyInfo, OpenSearchPropertyAttributeBase attribute);

void Visit(IKnnVectorProperty type, PropertyInfo propertyInfo, OpenSearchPropertyAttributeBase attribute);

IProperty Visit(PropertyInfo propertyInfo, OpenSearchPropertyAttributeBase attribute);

bool SkipProperty(PropertyInfo propertyInfo, OpenSearchPropertyAttributeBase attribute);
Expand Down
5 changes: 5 additions & 0 deletions src/OpenSearch.Client/Mapping/Visitor/NoopPropertyVisitor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ public virtual void Visit(ISearchAsYouTypeProperty type, PropertyInfo propertyIn

public virtual void Visit(IFieldAliasProperty type, PropertyInfo propertyInfo, OpenSearchPropertyAttributeBase attribute) { }

public virtual void Visit(IKnnVectorProperty type, PropertyInfo propertyInfo, OpenSearchPropertyAttributeBase attribute) { }

public virtual IProperty Visit(PropertyInfo propertyInfo, OpenSearchPropertyAttributeBase attribute) => null;

public void Visit(IProperty type, PropertyInfo propertyInfo, OpenSearchPropertyAttributeBase attribute)
Expand Down Expand Up @@ -176,6 +178,9 @@ public void Visit(IProperty type, PropertyInfo propertyInfo, OpenSearchPropertyA
case IFieldAliasProperty fieldAlias:
Visit(fieldAlias, propertyInfo, attribute);
break;
case IKnnVectorProperty knnVector:
Visit(knnVector, propertyInfo, attribute);
break;
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,9 @@ public interface IQueryContainer
[DataMember(Name = "distance_feature")]
IDistanceFeatureQuery DistanceFeature { get; set; }

[DataMember(Name = "knn")]
IKnnQuery Knn { get; set; }

void Accept(IQueryVisitor visitor);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ public partial class QueryContainer : IQueryContainer, IDescriptor
private IHasParentQuery _hasParent;
private IIdsQuery _ids;
private IIntervalsQuery _intervals;
private IKnnQuery _knn;
private IMatchQuery _match;
private IMatchAllQuery _matchAllQuery;
private IMatchBoolPrefixQuery _matchBoolPrefixQuery;
Expand Down Expand Up @@ -193,6 +194,12 @@ IIntervalsQuery IQueryContainer.Intervals
set => _intervals = Set(value);
}

IKnnQuery IQueryContainer.Knn
{
get => _knn;
set => _knn = Set(value);
}

IMatchQuery IQueryContainer.Match
{
get => _match;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,9 @@ public QueryContainer HasChild<TChild>(Func<HasChildQueryDescriptor<TChild>, IHa
public QueryContainer HasParent<TParent>(Func<HasParentQueryDescriptor<TParent>, IHasParentQuery> selector) where TParent : class =>
WrapInContainer(selector, (query, container) => container.HasParent = query);

public QueryContainer Knn(Func<KnnQueryDescriptor<T>, IKnnQuery> selector) =>
WrapInContainer(selector, (query, container) => container.Knn = query);

/// <summary>
/// A query that generates the union of documents produced by its subqueries, and that scores each document
/// with the maximum score for that document as produced by any subquery, plus a tie breaking increment for
Expand Down
3 changes: 3 additions & 0 deletions src/OpenSearch.Client/QueryDsl/Query.cs
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@ public static QueryContainer Ids(Func<IdsQueryDescriptor, IIdsQuery> selector) =
public static QueryContainer Intervals(Func<IntervalsQueryDescriptor<T>, IIntervalsQuery> selector) =>
new QueryContainerDescriptor<T>().Intervals(selector);

public static QueryContainer Knn(Func<KnnQueryDescriptor<T>, IKnnQuery> selector) =>
new QueryContainerDescriptor<T>().Knn(selector);

public static QueryContainer Match(Func<MatchQueryDescriptor<T>, IMatchQuery> selector) =>
new QueryContainerDescriptor<T>().Match(selector);

Expand Down
Loading

0 comments on commit bb7c981

Please sign in to comment.