Skip to content

Commit

Permalink
feat: Replaced AddImagesAsync.
Browse files Browse the repository at this point in the history
  • Loading branch information
HavenDV committed Apr 9, 2024
1 parent 839dcd6 commit ddd87b7
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 41 deletions.
17 changes: 16 additions & 1 deletion src/Databases/Abstractions/src/VectorStoreExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ public static async Task<IReadOnlyCollection<string>> AddDocumentsAsync(
this IVectorDatabase vectorDatabase,
IEmbeddingModel embeddingModel,
IReadOnlyCollection<Document> documents,
EmbeddingSettings? embeddingSettings = default,
CancellationToken cancellationToken = default)
{
vectorDatabase = vectorDatabase ?? throw new ArgumentNullException(nameof(vectorDatabase));
Expand All @@ -66,6 +67,7 @@ public static async Task<IReadOnlyCollection<string>> AddDocumentsAsync(
embeddingModel: embeddingModel,
texts: documents.Select(x => x.PageContent).ToArray(),
metadatas: documents.Select(x => x.Metadata).ToArray(),
embeddingSettings: embeddingSettings,
cancellationToken).ConfigureAwait(false);
}

Expand All @@ -74,13 +76,26 @@ public static async Task<IReadOnlyCollection<string>> AddTextsAsync(
IEmbeddingModel embeddingModel,
IReadOnlyCollection<string> texts,
IReadOnlyCollection<IReadOnlyDictionary<string, object>>? metadatas = null,
EmbeddingSettings? embeddingSettings = default,
CancellationToken cancellationToken = default)
{
vectorDatabase = vectorDatabase ?? throw new ArgumentNullException(nameof(vectorDatabase));
embeddingModel = embeddingModel ?? throw new ArgumentNullException(nameof(embeddingModel));

var embeddingRequest = new EmbeddingRequest
{
Strings = texts.ToArray(),
Images = metadatas?
.Select((metadata, i) => metadata.TryGetValue(texts.ElementAt(i), out object? result)
? result as BinaryData
: null)
.Where(x => x != null)
.Select(x => Data.FromBytes(x!.ToArray()))
.ToArray() ?? [],
};

float[][] embeddings = await embeddingModel
.CreateEmbeddingsAsync(texts.ToArray(), null, cancellationToken)
.CreateEmbeddingsAsync(embeddingRequest, embeddingSettings, cancellationToken)
.ConfigureAwait(false);

return await vectorDatabase.AddAsync(
Expand Down
41 changes: 1 addition & 40 deletions src/Databases/OpenSearch/src/OpenSearchVectorStore.cs
Original file line number Diff line number Diff line change
Expand Up @@ -65,53 +65,14 @@ public async Task<IReadOnlyCollection<string>> AddAsync(
);
}

var bulkResponse = await _client!.BulkAsync(bulkDescriptor, cancellationToken)
var bulkResponse = await _client.BulkAsync(bulkDescriptor, cancellationToken)
.ConfigureAwait(false);

return items
.Select(i => i.Id)
.ToArray();
}

public async Task<IEnumerable<string>> AddImagesAsync(IEnumerable<Document> documents, CancellationToken cancellationToken = default)
{
var bulkDescriptor = new BulkDescriptor();
var i = 1;

var enumerable = documents as Document[] ?? documents.ToArray();
foreach (var document in enumerable)
{
document.Metadata.TryGetValue(document.PageContent, out object? value);
var image = (BinaryData)value!;
var images = new List<Data> { Data.FromBytes(image.ToArray()) };

var embeddingRequest = new EmbeddingRequest
{
Strings = new List<string>() { document.PageContent },
Images = images
};
var embed = await EmbeddingModel.CreateEmbeddingsAsync(embeddingRequest, cancellationToken: cancellationToken)
.ConfigureAwait(false);

var vectorRecord = new VectorRecord
{
Id = i++.ToString(CultureInfo.InvariantCulture),
Text = document.PageContent,
Vector = embed.Values.SelectMany(x => x).ToArray()
};

bulkDescriptor.Index<VectorRecord>(desc => desc
.Document(vectorRecord)
.Index(_indexName)
);
}

var bulkResponse = await _client!.BulkAsync(bulkDescriptor, cancellationToken)
.ConfigureAwait(false);

return new List<string>();
}

public Task<bool> DeleteAsync(
IEnumerable<string> ids,
CancellationToken cancellationToken = default)
Expand Down

0 comments on commit ddd87b7

Please sign in to comment.