Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Refactor providers #131

Merged
merged 25 commits into from
Feb 19, 2024
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
5f740cd
refactor: Moved provider abstractions to separate package.
HavenDV Jan 29, 2024
2dde1d8
feat: Converted OpenAI provider.
HavenDV Feb 3, 2024
be3aa74
feat: Converted Ollama.
HavenDV Feb 3, 2024
f0e51cd
feat: Implemented LLamaSharp.
HavenDV Feb 3, 2024
6c9f0c0
feat: Implemented LeonardoAi.
HavenDV Feb 3, 2024
058a510
feat: Implemented HuggingFace.
HavenDV Feb 3, 2024
43ed538
feat: Implemented Google.
HavenDV Feb 3, 2024
2395efc
feat: Implemented Settings.Calculate for OpenAI.
HavenDV Feb 4, 2024
238bda0
feat: Implemented Automatic1111.
HavenDV Feb 4, 2024
d09a4a0
feat: Implemented Anyscale.
HavenDV Feb 4, 2024
4d02b0b
feat: Implemented Azure.
HavenDV Feb 4, 2024
8e61922
feat: Implemented Anthropic.
HavenDV Feb 4, 2024
1ac7462
Merge branch 'main' into 119-refactor-providers
HavenDV Feb 4, 2024
d2aab28
fix: Fixed WithDebugExtensions.
HavenDV Feb 4, 2024
4f4c63e
fix: Small fixes.
HavenDV Feb 4, 2024
25cd1e6
test: Fixed some tests.
HavenDV Feb 4, 2024
aa7d2fe
feat: Adapted other code to new changes.
HavenDV Feb 5, 2024
16bd59b
feat: Implemented Bedrock.
HavenDV Feb 5, 2024
ddf70de
fix: Small fixes.
HavenDV Feb 12, 2024
547483a
Merge branch 'main' into 119-refactor-providers
HavenDV Feb 17, 2024
7241910
fix: Fixed conflicts.
HavenDV Feb 17, 2024
ebdc8e2
feat: Added new Amazon Bedrock implementations.
HavenDV Feb 19, 2024
0f4b7ee
feat: Adapted Amazon.SageMaker.
HavenDV Feb 19, 2024
faee80f
fix: Models constructors now require provider.
HavenDV Feb 19, 2024
bbbd027
fix: Removed unused FileMemory example.
HavenDV Feb 19, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions LangChain.sln
Original file line number Diff line number Diff line change
Expand Up @@ -172,11 +172,13 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LangChain.Providers.Automat
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LangChain.Providers.Automatic1111.IntegrationTests", "src\tests\LangChain.Providers.Automatic1111.IntegrationTests\LangChain.Providers.Automatic1111.IntegrationTests.csproj", "{A6CF79BC-8365-46E8-9230-1A4AD615D40B}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LangChain.Providers.Abstractions", "src\libs\Providers\LangChain.Providers.Abstractions\LangChain.Providers.Abstractions.csproj", "{628DDC9D-28A1-4C2F-BA13-171BD8CF711C}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LangChain.Samples.FileMemory", "examples\LangChain.Samples.FileMemory\LangChain.Samples.FileMemory.csproj", "{BA701280-0BEB-4DA4-92B3-9C777082C2AF}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LangChain.Providers.Bedrock.IntegrationTests", "src\tests\LangChain.Providers.Bedrock.IntegrationTests\LangChain.Providers.Bedrock.IntegrationTests.csproj", "{73C76E80-95C5-4C96-A319-4F32043C903E}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LangChain.Providers.Amazon.Bedrock.IntegrationTests", "src\tests\LangChain.Providers.Amazon.Bedrock.IntegrationTests\LangChain.Providers.Amazon.Bedrock.IntegrationTests.csproj", "{73C76E80-95C5-4C96-A319-4F32043C903E}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LangChain.Providers.Bedrock", "src\libs\Providers\LangChain.Providers.Bedrock\LangChain.Providers.Bedrock.csproj", "{67985CCB-F606-41F8-9D36-513459F58882}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LangChain.Providers.Amazon.Bedrock", "src\libs\Providers\LangChain.Providers.Amazon.Bedrock\LangChain.Providers.Amazon.Bedrock.csproj", "{67985CCB-F606-41F8-9D36-513459F58882}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Expand Down Expand Up @@ -408,6 +410,10 @@ Global
{A6CF79BC-8365-46E8-9230-1A4AD615D40B}.Debug|Any CPU.Build.0 = Debug|Any CPU
{A6CF79BC-8365-46E8-9230-1A4AD615D40B}.Release|Any CPU.ActiveCfg = Release|Any CPU
{A6CF79BC-8365-46E8-9230-1A4AD615D40B}.Release|Any CPU.Build.0 = Release|Any CPU
{628DDC9D-28A1-4C2F-BA13-171BD8CF711C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{628DDC9D-28A1-4C2F-BA13-171BD8CF711C}.Debug|Any CPU.Build.0 = Debug|Any CPU
{628DDC9D-28A1-4C2F-BA13-171BD8CF711C}.Release|Any CPU.ActiveCfg = Release|Any CPU
{628DDC9D-28A1-4C2F-BA13-171BD8CF711C}.Release|Any CPU.Build.0 = Release|Any CPU
{BA701280-0BEB-4DA4-92B3-9C777082C2AF}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{BA701280-0BEB-4DA4-92B3-9C777082C2AF}.Debug|Any CPU.Build.0 = Debug|Any CPU
{BA701280-0BEB-4DA4-92B3-9C777082C2AF}.Release|Any CPU.ActiveCfg = Release|Any CPU
Expand Down Expand Up @@ -488,6 +494,7 @@ Global
{4913844F-74EC-4E74-AE8A-EA825569E6BA} = {E55391DE-F8F3-4CC2-A0E3-2406C76E9C68}
{BF4C7B87-0997-4208-84EF-D368DF7B9861} = {E55391DE-F8F3-4CC2-A0E3-2406C76E9C68}
{A6CF79BC-8365-46E8-9230-1A4AD615D40B} = {FDEE2E22-C239-4921-83B2-9797F765FD6A}
{628DDC9D-28A1-4C2F-BA13-171BD8CF711C} = {E55391DE-F8F3-4CC2-A0E3-2406C76E9C68}
{BA701280-0BEB-4DA4-92B3-9C777082C2AF} = {F17A86AE-A174-4B6C-BAA7-9D9A9704BE85}
{73C76E80-95C5-4C96-A319-4F32043C903E} = {FDEE2E22-C239-4921-83B2-9797F765FD6A}
{67985CCB-F606-41F8-9D36-513459F58882} = {E55391DE-F8F3-4CC2-A0E3-2406C76E9C68}
Expand Down
3 changes: 1 addition & 2 deletions examples/LangChain.Samples.HuggingFace/Program.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
using LangChain.Providers;
using LangChain.Providers.HuggingFace;
using LangChain.Providers.HuggingFace.Predefined;

using var client = new HttpClient();
var gpt2Model = new Gpt2Model(apiKey: string.Empty, client);
Expand Down
3 changes: 1 addition & 2 deletions examples/LangChain.Samples.OpenAI/Program.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
using LangChain.Providers;
using LangChain.Providers.OpenAI;
using LangChain.Providers.OpenAI.Predefined;

var apiKey =
Environment.GetEnvironmentVariable("OPENAI_API_KEY") ??
Expand Down
1 change: 1 addition & 0 deletions src/Directory.Packages.props
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
<PackageVersion Include="StackExchange.Redis" Version="2.7.4" />
<PackageVersion Include="System.Net.Http" Version="4.3.4" />
<PackageVersion Include="System.Text.Json" Version="8.0.0" />
<PackageVersion Include="System.ValueTuple" Version="4.5.0" />
<PackageVersion Include="Tiktoken" Version="1.1.3" />
<PackageVersion Include="tryAGI.OpenAI" Version="2.0.0-alpha.9" />
<PackageVersion Include="Microsoft.Extensions.DependencyInjection.Abstractions" Version="8.0.0" />
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
using System.Text.Json;
using LangChain.Abstractions.Embeddings.Base;
using LangChain.Common.Converters;
using LangChain.Docstore;
using LangChain.Providers;
using LangChain.VectorStores;
using Microsoft.SemanticKernel.AI.Embeddings;
using Microsoft.SemanticKernel.Connectors.Memory.Chroma;
Expand Down Expand Up @@ -31,7 +31,7 @@ public class ChromaVectorStore : VectorStore
public ChromaVectorStore(
HttpClient httpClient,
string endpoint,
IEmbeddings embeddings,
IEmbeddingModel embeddings,
string collectionName = LangchainDefaultCollectionName)
: base(embeddings)
{
Expand Down Expand Up @@ -166,8 +166,8 @@ public override async Task<IEnumerable<Document>> SimilaritySearchByVectorAsync(
int k = 4,
CancellationToken cancellationToken = default)
{
var embeddings = await Embeddings
.EmbedQueryAsync(query, cancellationToken)
var embeddings = await EmbeddingModel
.CreateEmbeddingsAsync(query, null, cancellationToken)
.ConfigureAwait(false);

var documentsWithScores = await SimilaritySearchByVectorWithAsync(embeddings, k, cancellationToken).ConfigureAwait(false);
Expand All @@ -192,8 +192,8 @@ public override async Task<IEnumerable<Document>> MaxMarginalRelevanceSearch(
float lambdaMult = 0.5f,
CancellationToken cancellationToken = default)
{
var embeddings = await Embeddings
.EmbedQueryAsync(query, cancellationToken)
float[] embeddings = await EmbeddingModel
.CreateEmbeddingsAsync(query, null, cancellationToken)
.ConfigureAwait(false);

var documents = await MaxMarginalRelevanceSearchByVector(
Expand Down Expand Up @@ -251,8 +251,8 @@ private async Task<IEnumerable<string>> AddCoreAsync(
string[] ids,
CancellationToken cancellationToken)
{
var embeddings = await Embeddings
.EmbedDocumentsAsync(texts, cancellationToken)
float[][] embeddings = await EmbeddingModel
.CreateEmbeddingsAsync(texts, null, cancellationToken)
.ConfigureAwait(false);

var records = new MemoryRecord[texts.Length];
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
using LangChain.Abstractions.Embeddings.Base;
using LangChain.Docstore;
using LangChain.Docstore;
using LangChain.Indexes;
using LangChain.Providers;
using LangChain.TextSplitters;
using LangChain.VectorStores;

Expand All @@ -15,7 +15,7 @@ namespace LangChain.Databases.InMemory;
/// <param name="embeddings"></param>
/// <param name="distanceMetrics"></param>
public class InMemoryVectorStore(
IEmbeddings embeddings,
IEmbeddingModel embeddings,
EDistanceMetrics distanceMetrics = EDistanceMetrics.Euclidean)
: VectorStore(embeddings)
{
Expand All @@ -33,7 +33,7 @@ public class InMemoryVectorStore(
/// <param name="documents"></param>
/// <returns></returns>
public static async Task<VectorStoreIndexWrapper> CreateIndexFromDocuments(
IEmbeddings embeddings,
IEmbeddingModel embeddings,
IReadOnlyCollection<Document> documents)
{
var vectorStore = new InMemoryVectorStore(embeddings);
Expand All @@ -51,9 +51,9 @@ public override async Task<IEnumerable<string>> AddDocumentsAsync(
{
var docs = documents.ToArray();

var embeddings = await Embeddings.EmbedDocumentsAsync(docs
float[][] embeddings = await EmbeddingModel.CreateEmbeddingsAsync(docs
.Select(x => x.PageContent)
.ToArray(), cancellationToken).ConfigureAwait(false);
.ToArray(), cancellationToken: cancellationToken).ConfigureAwait(false);
var ids = new List<string>();
for (var i = 0; i < docs.Length; i++)
{
Expand Down Expand Up @@ -98,9 +98,9 @@ public override async Task<IEnumerable<Document>> SimilaritySearchAsync(
int k = 4,
CancellationToken cancellationToken = default)
{
var embedding = await Embeddings.EmbedQueryAsync(
float[] embedding = await EmbeddingModel.CreateEmbeddingsAsync(
query,
cancellationToken).ConfigureAwait(false);
cancellationToken: cancellationToken).ConfigureAwait(false);

return await SimilaritySearchByVectorAsync(
embedding,
Expand Down Expand Up @@ -129,15 +129,14 @@ public override Task<IEnumerable<Document>> SimilaritySearchByVectorAsync(
int k = 4,
CancellationToken cancellationToken = default)
{
var embedding = await Embeddings.EmbedQueryAsync(
float[] embedding = await EmbeddingModel.CreateEmbeddingsAsync(
query,
cancellationToken).ConfigureAwait(false);
var arr = embedding.ToArray();
cancellationToken: cancellationToken).ConfigureAwait(false);
var distances = _storage.Select(s =>
new
{
doc = s.doc,
distance = _distanceFunction(s.vec, arr)
distance = _distanceFunction(s.vec, embedding)
}).Take(k);

return distances.Select(d => new ValueTuple<Document, float>(d.doc, d.distance));
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
using LangChain.Abstractions.Embeddings.Base;
using LangChain.Databases.Postgres;
using LangChain.Docstore;
using LangChain.Providers;
using LangChain.VectorStores;

namespace LangChain.Databases;
Expand All @@ -25,12 +25,12 @@ public class PostgresVectorStore : VectorStore
public PostgresVectorStore(
string connectionString,
int vectorSize,
IEmbeddings embeddings,
IEmbeddingModel embeddingModel,
string schema = DefaultSchema,
string collectionName = DefaultCollectionName,
DistanceStrategy distanceStrategy = DistanceStrategy.Cosine,
Func<float, float>? overrideRelevanceScoreFn = null)
: base(embeddings, overrideRelevanceScoreFn)
: base(embeddingModel, overrideRelevanceScoreFn)
{
_distanceStrategy = distanceStrategy;
_collectionName = collectionName;
Expand All @@ -44,8 +44,10 @@ public override async Task<IEnumerable<string>> AddDocumentsAsync(
CancellationToken cancellationToken = default)
{
var documentsArray = documents.ToArray();
var embeddings = await Embeddings
.EmbedDocumentsAsync(documentsArray.Select(d => d.PageContent).ToArray(), cancellationToken)
float[][] embeddings = await EmbeddingModel
.CreateEmbeddingsAsync(documentsArray
.Select(d => d.PageContent)
.ToArray(), null, cancellationToken)
.ConfigureAwait(false);

var ids = new string[documentsArray.Length];
Expand Down Expand Up @@ -75,8 +77,8 @@ public override async Task<IEnumerable<string>> AddTextsAsync(
var textsArray = texts.ToArray();
var metadatasArray = metadatas?.ToArray() ?? new Dictionary<string, object>?[textsArray.Length];

var embeddings = await Embeddings
.EmbedDocumentsAsync(textsArray, cancellationToken)
float[][] embeddings = await EmbeddingModel
.CreateEmbeddingsAsync(textsArray, null, cancellationToken)
.ConfigureAwait(false);

var ids = new string[textsArray.Length];
Expand Down Expand Up @@ -137,9 +139,11 @@ public override async Task<IEnumerable<Document>> SimilaritySearchByVectorAsync(
string query, int k = 4,
CancellationToken cancellationToken = default)
{
var embedding = await Embeddings.EmbedQueryAsync(query, cancellationToken).ConfigureAwait(false);
float[] embedding = await EmbeddingModel.CreateEmbeddingsAsync(
query, null, cancellationToken).ConfigureAwait(false);

return await SimilaritySearchByVectorWithScoreAsync(embedding, k, cancellationToken)
return await SimilaritySearchByVectorWithScoreAsync(
embedding, k, cancellationToken)
.ConfigureAwait(false);
}

Expand Down Expand Up @@ -183,7 +187,7 @@ public override async Task<IEnumerable<Document>> MaxMarginalRelevanceSearch(
int fetchK = 20, float lambdaMult = 0.5f,
CancellationToken cancellationToken = default)
{
var embedding = await Embeddings.EmbedQueryAsync(query, cancellationToken).ConfigureAwait(false);
float[] embedding = await EmbeddingModel.CreateEmbeddingsAsync(query, null, cancellationToken).ConfigureAwait(false);

return await MaxMarginalRelevanceSearchByVector(embedding, k, fetchK, lambdaMult, cancellationToken)
.ConfigureAwait(false);
Expand Down
52 changes: 36 additions & 16 deletions src/libs/Databases/LangChain.Databases.Sqlite/SQLiteVectorStore.cs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
using System.Text.Json;
using LangChain.Abstractions.Embeddings.Base;
using LangChain.Base;
using LangChain.Docstore;
using LangChain.Indexes;
using LangChain.Providers;
using LangChain.TextSplitters;
using LangChain.VectorStores;
using Microsoft.Data.Sqlite;
Expand All @@ -28,7 +28,7 @@ public sealed class SQLiteVectorStore : VectorStore, IDisposable
/// <param name="textSplitter"></param>
/// <returns></returns>
public static async Task<VectorStoreIndexWrapper> CreateIndexFromDocuments(
IEmbeddings embeddings,
IEmbeddingModel embeddings,
IReadOnlyCollection<Document> documents,
string filename = "vectorstore.db",
string tableName = "vectors",
Expand All @@ -41,18 +41,20 @@ public static async Task<VectorStoreIndexWrapper> CreateIndexFromDocuments(
return index;
}

public static SQLIteVectorStoreOptions DefaultOptions = new SQLIteVectorStoreOptions();
public static SQLIteVectorStoreOptions DefaultOptions { get; } = new();

/// <summary>
/// If database does not exists, it loads documents from the documentsSource, creates an index from these documents and returns the created index.
/// If database exists, it loads the index from the database.
/// documentsSource is used only if the database does not exist. If the database exists, documentsSource is ignored.
/// </summary>
/// <param name="embeddings">An object implementing the <see cref="IEmbeddings"/> interface. This object is used to generate embeddings for the documents.</param>
/// <param name="embeddings">An object implementing the <see cref="IEmbeddingModel"/> interface. This object is used to generate embeddings for the documents.</param>
/// <param name="documentsSource">An optional object implementing the <see cref="ISource"/> interface. This object is used to load documents if the vector store database file does not exist.</param>
/// <param name="options">An optional <see cref="SQLIteVectorStoreOptions"/> object. This object provides configuration options for the SQLite vector store</param>
public static async Task<VectorStoreIndexWrapper> GetIndex(
IEmbeddings embeddings, ISource? documentsSource=null, SQLIteVectorStoreOptions? options=null)
IEmbeddingModel embeddings,
ISource? documentsSource = null,
SQLIteVectorStoreOptions? options = null)
{
options ??= DefaultOptions;

Expand All @@ -61,10 +63,11 @@ public static async Task<VectorStoreIndexWrapper> GetIndex(

if (!System.IO.File.Exists("vectors.db"))
{
var documents = await documentsSource.LoadAsync();
return await SQLiteVectorStore.CreateIndexFromDocuments(embeddings, documents, options.Filename, options.TableName, textSplitter: textSplitter);
}
var documents = await documentsSource.LoadAsync().ConfigureAwait(false);

return await CreateIndexFromDocuments(
embeddings, documents, options.Filename, options.TableName, textSplitter: textSplitter).ConfigureAwait(false);
}

var vectorStore = new SQLiteVectorStore(options.Filename, options.TableName, embeddings);
var index = new VectorStoreIndexWrapper(vectorStore);
Expand All @@ -81,7 +84,7 @@ public static async Task<VectorStoreIndexWrapper> GetIndex(
public SQLiteVectorStore(
string filename,
string tableName,
IEmbeddings embeddings,
IEmbeddingModel embeddings,
EDistanceMetrics distanceMetrics = EDistanceMetrics.Euclidean)
: base(embeddings)
{
Expand Down Expand Up @@ -201,7 +204,11 @@ public override async Task<IEnumerable<string>> AddDocumentsAsync(

var docs = documents.ToArray();

var embeddings = await Embeddings.EmbedDocumentsAsync(docs.Select(x => x.PageContent).ToArray(), cancellationToken).ConfigureAwait(false);
float[][] embeddings = await EmbeddingModel.CreateEmbeddingsAsync(
docs
.Select(x => x.PageContent)
.ToArray(),
cancellationToken: cancellationToken).ConfigureAwait(false);
List<string> ids = new List<string>();
for (int i = 0; i < docs.Length; i++)
{
Expand Down Expand Up @@ -258,10 +265,19 @@ public override async Task<bool> DeleteAsync(IEnumerable<string> ids, Cancellati
/// <param name="k"></param>
/// <param name="cancellationToken"></param>
/// <returns></returns>
public override async Task<IEnumerable<Document>> SimilaritySearchAsync(string query, int k = 4, CancellationToken cancellationToken = default)
public override async Task<IEnumerable<Document>> SimilaritySearchAsync(
string query,
int k = 4,
CancellationToken cancellationToken = default)
{
var embedding = await Embeddings.EmbedQueryAsync(query, cancellationToken).ConfigureAwait(false);
return await SimilaritySearchByVectorAsync(embedding, k, cancellationToken).ConfigureAwait(false);
float[] embedding = await EmbeddingModel.CreateEmbeddingsAsync(
query,
cancellationToken: cancellationToken).ConfigureAwait(false);

return await SimilaritySearchByVectorAsync(
embedding,
k,
cancellationToken).ConfigureAwait(false);
}

/// <summary>
Expand Down Expand Up @@ -289,9 +305,13 @@ public override async Task<IEnumerable<Document>> SimilaritySearchByVectorAsync(
public override async Task<IEnumerable<(Document, float)>> SimilaritySearchWithScoreAsync(string query,
int k = 4, CancellationToken cancellationToken = default)
{
var embedding = await Embeddings.EmbedQueryAsync(query, cancellationToken).ConfigureAwait(false);
var arr = embedding.ToArray();
var documents = await SearchByVector(arr, k).ConfigureAwait(false);
float[] embedding = await EmbeddingModel.CreateEmbeddingsAsync(
query,
cancellationToken: cancellationToken).ConfigureAwait(false);
var documents = await SearchByVector(
embedding,
k).ConfigureAwait(false);

return documents;
}

Expand Down
Loading