Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Added Databases abstractions. #174

Merged
merged 15 commits into from
Apr 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions LangChain.sln
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,19 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Cli", "Cli", "{177349C8-4FD
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LangChain.Cli", "src\Cli\src\LangChain.Cli.csproj", "{DE30162A-5D05-4E9B-80D7-43A8927D450E}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Abstractions", "Abstractions", "{3E6FEA5C-3F34-4BFF-BB68-2AFFF3B86436}"
ProjectSection(SolutionItems) = preProject
src\Databases\Abstractions\README.md = src\Databases\Abstractions\README.md
EndProjectSection
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LangChain.Databases.Abstractions", "src\Databases\Abstractions\src\LangChain.Databases.Abstractions.csproj", "{FFB4B70D-85F9-4C9D-BBAE-4863F83AF950}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LangChain.Databases.Abstractions.Tests", "src\Databases\Abstractions\test\LangChain.Databases.Abstractions.Tests.csproj", "{A2CF5ADB-7FF7-499A-9FE8-858793A5E67D}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LangChain.Databases.IntegrationTests", "src\Databases\IntegrationTests\LangChain.Databases.IntegrationTests.csproj", "{6B66A8D4-4536-4B3C-AD2E-F6F48D2C7C13}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "IntegrationTests", "IntegrationTests", "{DDE920D6-6506-430F-938A-A29C9177F850}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -658,6 +671,18 @@ Global
{DE30162A-5D05-4E9B-80D7-43A8927D450E}.Debug|Any CPU.Build.0 = Debug|Any CPU
{DE30162A-5D05-4E9B-80D7-43A8927D450E}.Release|Any CPU.ActiveCfg = Release|Any CPU
{DE30162A-5D05-4E9B-80D7-43A8927D450E}.Release|Any CPU.Build.0 = Release|Any CPU
{FFB4B70D-85F9-4C9D-BBAE-4863F83AF950}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{FFB4B70D-85F9-4C9D-BBAE-4863F83AF950}.Debug|Any CPU.Build.0 = Debug|Any CPU
{FFB4B70D-85F9-4C9D-BBAE-4863F83AF950}.Release|Any CPU.ActiveCfg = Release|Any CPU
{FFB4B70D-85F9-4C9D-BBAE-4863F83AF950}.Release|Any CPU.Build.0 = Release|Any CPU
{A2CF5ADB-7FF7-499A-9FE8-858793A5E67D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{A2CF5ADB-7FF7-499A-9FE8-858793A5E67D}.Debug|Any CPU.Build.0 = Debug|Any CPU
{A2CF5ADB-7FF7-499A-9FE8-858793A5E67D}.Release|Any CPU.ActiveCfg = Release|Any CPU
{A2CF5ADB-7FF7-499A-9FE8-858793A5E67D}.Release|Any CPU.Build.0 = Release|Any CPU
{6B66A8D4-4536-4B3C-AD2E-F6F48D2C7C13}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{6B66A8D4-4536-4B3C-AD2E-F6F48D2C7C13}.Debug|Any CPU.Build.0 = Debug|Any CPU
{6B66A8D4-4536-4B3C-AD2E-F6F48D2C7C13}.Release|Any CPU.ActiveCfg = Release|Any CPU
{6B66A8D4-4536-4B3C-AD2E-F6F48D2C7C13}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down Expand Up @@ -786,6 +811,11 @@ Global
{08BA819F-538B-44A8-9463-3F90381F7F0D} = {E6A4D04E-CC83-4F24-89B4-04F3EF61715F}
{08270801-B335-4DDE-8329-9D9198C3D3A1} = {E6A4D04E-CC83-4F24-89B4-04F3EF61715F}
{DE30162A-5D05-4E9B-80D7-43A8927D450E} = {177349C8-4FD3-466E-A7F4-67983A5B2C1F}
{3E6FEA5C-3F34-4BFF-BB68-2AFFF3B86436} = {A098FF69-D8B5-4B2B-83D5-F777D3817F15}
{FFB4B70D-85F9-4C9D-BBAE-4863F83AF950} = {3E6FEA5C-3F34-4BFF-BB68-2AFFF3B86436}
{A2CF5ADB-7FF7-499A-9FE8-858793A5E67D} = {3E6FEA5C-3F34-4BFF-BB68-2AFFF3B86436}
{DDE920D6-6506-430F-938A-A29C9177F850} = {A098FF69-D8B5-4B2B-83D5-F777D3817F15}
{6B66A8D4-4536-4B3C-AD2E-F6F48D2C7C13} = {DDE920D6-6506-430F-938A-A29C9177F850}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {5C00D0F1-6138-4ED9-846B-97E43D6DFF1C}
Expand Down
1 change: 1 addition & 0 deletions LangChain.sln.DotSettings
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
<wpf:ResourceDictionary xml:space="preserve" xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml" xmlns:s="clr-namespace:System;assembly=mscorlib" xmlns:ss="urn:shemas-jetbrains-com:settings-storage-xaml" xmlns:wpf="http://schemas.microsoft.com/winfx/2006/xaml/presentation">
<s:Boolean x:Key="/Default/UserDictionary/Words/=Anyscale/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=hnsw/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=Initializable/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=llava/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=Modelfile/@EntryIndexedValue">True</s:Boolean>
Expand Down
15 changes: 9 additions & 6 deletions src/Core/src/Chains/Chain.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
using LangChain.Chains.StackableChains.ImageGeneration;
using LangChain.Chains.StackableChains.ImageToTextGeneration;
using LangChain.Chains.StackableChains.ReAct;
using LangChain.Indexes;
using LangChain.Databases;
using LangChain.Memory;
using LangChain.Providers;

Expand Down Expand Up @@ -95,30 +95,33 @@ public static LLMChain LargeLanguageModel(

/// <inheritdoc cref="RetrieveSimilarDocuments"/>
public static RetrieveDocumentsChain RetrieveDocuments(
VectorStoreIndexWrapper index,
IVectorDatabase vectorDatabase,
IEmbeddingModel embeddingModel,
int amount = 4,
string inputKey = "text",
string outputKey = "docs")
{
return new RetrieveDocumentsChain(index, inputKey, outputKey, amount);
return new RetrieveDocumentsChain(vectorDatabase, embeddingModel, inputKey, outputKey, amount);
}


/// <summary>
/// Takes most similar documents.
/// </summary>
/// <param name="index"></param>
/// <param name="vectorDatabase"></param>
/// <param name="embeddingModel"></param>
/// <param name="amount"></param>
/// <param name="inputKey"></param>
/// <param name="outputKey"></param>
/// <returns></returns>
public static RetrieveDocumentsChain RetrieveSimilarDocuments(
VectorStoreIndexWrapper index,
IVectorDatabase vectorDatabase,
IEmbeddingModel embeddingModel,
int amount = 4,
string inputKey = "text",
string outputKey = "docs")
{
return new RetrieveDocumentsChain(index, inputKey, outputKey, amount);
return new RetrieveDocumentsChain(vectorDatabase, embeddingModel, inputKey, outputKey, amount);
}

/// <inheritdoc cref="CombineDocuments"/>
Expand Down
19 changes: 13 additions & 6 deletions src/Core/src/Chains/StackableChains/RetreiveDocumentsChain.cs
Original file line number Diff line number Diff line change
@@ -1,19 +1,26 @@
using LangChain.Abstractions.Schema;
using LangChain.Indexes;
using LangChain.VectorStores;
using LangChain.Databases;
using LangChain.Providers;

namespace LangChain.Chains.HelperChains;

/// <inheritdoc/>
public class RetrieveDocumentsChain : BaseStackableChain
{
private readonly VectorStoreIndexWrapper _index;
private readonly IVectorDatabase _vectorDatabase;
private readonly IEmbeddingModel _embeddingModel;
private readonly int _amount;

/// <inheritdoc/>
public RetrieveDocumentsChain(VectorStoreIndexWrapper index, string inputKey = "query", string outputKey = "documents", int amount = 4)
public RetrieveDocumentsChain(
IVectorDatabase vectorDatabase,
IEmbeddingModel embeddingModel,
string inputKey = "query",
string outputKey = "documents",
int amount = 4)
{
_index = index;
_vectorDatabase = vectorDatabase;
_embeddingModel = embeddingModel;
_amount = amount;
InputKeys = new[] { inputKey };
OutputKeys = new[] { outputKey };
Expand All @@ -24,7 +31,7 @@ protected override async Task<IChainValues> InternalCall(IChainValues values)
{
values = values ?? throw new ArgumentNullException(nameof(values));

var retriever = _index.Store.AsRetriever();
var retriever = _vectorDatabase.AsRetriever(_embeddingModel);
retriever.K = _amount;

var query = values.Value[InputKeys[0]].ToString() ?? string.Empty;
Expand Down
57 changes: 27 additions & 30 deletions src/Core/src/Indexes/VectorStoreIndexCreator.cs
Original file line number Diff line number Diff line change
@@ -1,54 +1,51 @@
using LangChain.Base;
using LangChain.Extensions;
using LangChain.Sources;
using LangChain.Sources;
using LangChain.Splitters.Text;
using LangChain.VectorStores;
using LangChain.Databases;
using LangChain.Extensions;
using LangChain.Providers;

namespace LangChain.Indexes;

/// <summary>
/// Logic for creating a vectorstore index.
/// Logic for creating a VectorDatabases tables.
/// </summary>
/// // embeddings are not needed here because VectorStore already has them
public class VectorStoreIndexCreator(
VectorStore vectorStore,
ITextSplitter textSplitter)
public static class VectorStoreIndexCreator
{
/// <summary>
///
/// </summary>
public VectorStore VectorStore { get; } = vectorStore;

/// <summary>
///
/// </summary>
public ITextSplitter TextSplitter { get; } = textSplitter;

/// <summary>
/// Create a vectorstore index from loaders.
/// Create a VectorDatabase table from loaders.
/// </summary>
public async Task<VectorStoreIndexWrapper> FromLoaders(List<FileSource> loaders, CancellationToken cancellationToken = default)
public static async Task<IReadOnlyCollection<string>> LoadAndSplitDocuments(
this IVectorDatabase vectorDatabase,
IEmbeddingModel embeddingModel,
IReadOnlyCollection<ISource> sources,
ITextSplitter? textSplitter = null,
CancellationToken cancellationToken = default)
{
loaders = loaders ?? throw new ArgumentNullException(nameof(loaders));
sources = sources ?? throw new ArgumentNullException(nameof(sources));

List<Document> documents = new();
foreach (var loader in loaders)
var documents = new List<Document>();
foreach (var source in sources)
{
documents.AddRange(await loader.LoadAsync(cancellationToken).ConfigureAwait(false));
documents.AddRange(await source.LoadAsync(cancellationToken).ConfigureAwait(false));
}

return await FromDocumentsAsync(documents).ConfigureAwait(false);
return await vectorDatabase.AddSplitDocumentsAsync(embeddingModel, documents, textSplitter).ConfigureAwait(false);
}

/// <summary>
/// Create a vectorstore index from documents.
/// Create a VectorDatabase table from documents.
/// </summary>
public async Task<VectorStoreIndexWrapper> FromDocumentsAsync(IReadOnlyCollection<Document> documents)
public static async Task<IReadOnlyCollection<string>> AddSplitDocumentsAsync(
this IVectorDatabase vectorDatabase,
IEmbeddingModel embeddingModel,
IReadOnlyCollection<Document> documents,
ITextSplitter? textSplitter = null)
{
var subDocs = TextSplitter.SplitDocuments(documents);
textSplitter ??= new CharacterTextSplitter();

await VectorStore.AddDocumentsAsync(subDocs).ConfigureAwait(false);
var splitDocuments = textSplitter.SplitDocuments(documents);

return new VectorStoreIndexWrapper(VectorStore);
return await vectorDatabase.AddDocumentsAsync(embeddingModel, splitDocuments).ConfigureAwait(false);
}
}
20 changes: 9 additions & 11 deletions src/Core/src/Indexes/VectorStoreIndexWrapper.cs
Original file line number Diff line number Diff line change
@@ -1,30 +1,28 @@
using LangChain.Chains.CombineDocuments;
using LangChain.Chains.RetrievalQA;
using LangChain.VectorStores;
using LangChain.Databases;
using LangChain.Providers;

namespace LangChain.Indexes;

/// <summary>
///
/// </summary>
/// <param name="vectorStore"></param>
public class VectorStoreIndexWrapper(
VectorStore vectorStore)
public static class VectorStoreIndexWrapper
{
/// <summary>
///
/// </summary>
public VectorStore Store { get; } = vectorStore;

/// <summary>
///
/// </summary>
/// <param name="vectorDatabase"></param>
/// <param name="embeddingModel"></param>
/// <param name="question"></param>
/// <param name="llm"></param>
/// <param name="inputKey"></param>
/// <param name="outputKey"></param>
/// <returns></returns>
public Task<string?> QueryAsync(
public static Task<string?> QueryAsync(
this IVectorDatabase vectorDatabase,
IEmbeddingModel embeddingModel,
string question,
BaseCombineDocumentsChain llm,
string inputKey = "question",
Expand All @@ -33,7 +31,7 @@ public class VectorStoreIndexWrapper(
var chain = new RetrievalQaChain(
new RetrievalQaChainInput(
llm,
Store.AsRetriever())
vectorDatabase.AsRetriever(embeddingModel))
{
InputKey = inputKey,
OutputKey = outputKey,
Expand Down
1 change: 1 addition & 0 deletions src/Core/src/LangChain.Core.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

<ItemGroup>
<ProjectReference Include="..\..\Providers\Abstractions\src\LangChain.Providers.Abstractions.csproj" />
<ProjectReference Include="..\..\Databases\Abstractions\src\LangChain.Databases.Abstractions.csproj" />
<ProjectReference Include="..\..\Sources\Abstractions\src\LangChain.Sources.Abstractions.csproj" />
<ProjectReference Include="..\..\Splitters\Abstractions\src\LangChain.Splitters.Abstractions.csproj" />
<ProjectReference Include="..\..\Utilities\Pollyfils\src\LangChain.Polyfills.csproj" />
Expand Down
72 changes: 72 additions & 0 deletions src/Core/src/Retrievers/VectorStoreRetriever.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
using LangChain.Callback;
using LangChain.Databases;
using LangChain.Providers;
using LangChain.Sources;

namespace LangChain.Retrievers;

/// <summary>
/// Base Retriever class for VectorStore.
/// https://api.python.langchain.com/en/latest/_modules/langchain/schema/vectorstore.html
/// </summary>
public class VectorStoreRetriever : BaseRetriever
{
/// <summary>
///
/// </summary>
public IVectorDatabase VectorDatabase { get; init; }

private VectorSearchType SearchType { get; init; }

/// <summary>
///
/// </summary>
public int K { get; set; } = 4;

private float? ScoreThreshold { get; init; }

private IEmbeddingModel EmbeddingModel { get; init; }

/// <inheritdoc/>
public VectorStoreRetriever(
IVectorDatabase vectorDatabase,
IEmbeddingModel embeddingModel,
VectorSearchType searchType = VectorSearchType.Similarity,
float? scoreThreshold = null)
{
SearchType = searchType;

if (SearchType == VectorSearchType.SimilarityScoreThreshold && ScoreThreshold == null)
throw new ArgumentException($"ScoreThreshold required for {SearchType}");

EmbeddingModel = embeddingModel;
VectorDatabase = vectorDatabase;
SearchType = searchType;
ScoreThreshold = scoreThreshold;
}

/// <inheritdoc/>
protected override async Task<IEnumerable<Document>> GetRelevantDocumentsCoreAsync(
string query,
CallbackManagerForRetrieverRun? runManager = null)
{
var response = await VectorDatabase.SearchAsync(EmbeddingModel, query, searchSettings: new VectorSearchSettings
{
Type = SearchType,
NumberOfResults = K,
ScoreThreshold = ScoreThreshold,
}).ConfigureAwait(false);

return response.ToDocuments();
}

/// <summary>
///
/// </summary>
/// <param name="documents"></param>
/// <returns></returns>
public Task<IReadOnlyCollection<string>> AddDocumentsAsync(IReadOnlyCollection<Document> documents)
{
return VectorDatabase.AddDocumentsAsync(EmbeddingModel, documents);
}
}
Loading
Loading