From 0b5c7699ce4dccd32a861ce869f98dc7aba29528 Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Mon, 23 Sep 2024 16:08:55 +0100 Subject: [PATCH] .Net: Share generic data model validations and use everywhere. (#8923) ### Motivation and Context If someone uses the generic data model, they need to also supply a record definition, otherwise we don't know how to map the data from storage to the generic data model. Also, if someone uses the generic data model and they use a key type other than the ones we support out of the box, they need to provide a custom mapper to do that key mapping. ### Description - This PR adds checks for this with helpful messaging to a shared class. - Adds the validation calls to all connectors that support the generic data model. ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone :smile: --- ...zureAISearchVectorStoreRecordCollection.cs | 11 +--- .../QdrantVectorStoreRecordCollection.cs | 2 + ...RedisHashSetVectorStoreRecordCollection.cs | 2 + .../RedisJsonVectorStoreRecordCollection.cs | 2 + .../Data/VectorStoreRecordPropertyReader.cs | 56 +++++++++++++++++++ .../VectorStoreRecordPropertyReaderTests.cs | 35 ++++++++++++ 6 files changed, 99 insertions(+), 9 deletions(-) diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs index 69e0a70d453e..86c08da3ca50 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs @@ -105,15 +105,8 @@ public AzureAISearchVectorStoreRecordCollection(SearchIndexClient searchIndexCli // Verify. Verify.NotNull(searchIndexClient); Verify.NotNullOrWhiteSpace(collectionName); - Verify.True( - !(typeof(TRecord).IsGenericType && - typeof(TRecord).GetGenericTypeDefinition() == typeof(VectorStoreGenericDataModel<>) && - typeof(TRecord).GetGenericArguments()[0] != typeof(string) && - options?.JsonObjectCustomMapper is null), - "A data model of VectorStoreGenericDataModel with a different key type than string, is not supported by the default mappers. Please provide your own mapper to map to your chosen key type."); - Verify.True( - !(typeof(TRecord) == typeof(VectorStoreGenericDataModel) && options?.VectorStoreRecordDefinition is null), - $"A {nameof(VectorStoreRecordDefinition)} must be provided when using {nameof(VectorStoreGenericDataModel)}."); + VectorStoreRecordPropertyReader.VerifyGenericDataModelKeyType(typeof(TRecord), options?.JsonObjectCustomMapper is not null, s_supportedKeyTypes); + VectorStoreRecordPropertyReader.VerifyGenericDataModelDefinitionSupplied(typeof(TRecord), options?.VectorStoreRecordDefinition is not null); // Assign. this._searchIndexClient = searchIndexClient; diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs index 931e853b790e..8f6e85722bf6 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs @@ -82,6 +82,8 @@ internal QdrantVectorStoreRecordCollection(MockableQdrantClient qdrantClient, st // Verify. Verify.NotNull(qdrantClient); Verify.NotNullOrWhiteSpace(collectionName); + VectorStoreRecordPropertyReader.VerifyGenericDataModelKeyType(typeof(TRecord), options?.PointStructCustomMapper is not null, s_supportedKeyTypes); + VectorStoreRecordPropertyReader.VerifyGenericDataModelDefinitionSupplied(typeof(TRecord), options?.VectorStoreRecordDefinition is not null); // Assign. this._qdrantClient = qdrantClient; diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisHashSetVectorStoreRecordCollection.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisHashSetVectorStoreRecordCollection.cs index 23ba6e477f22..c865ba99a812 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisHashSetVectorStoreRecordCollection.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisHashSetVectorStoreRecordCollection.cs @@ -94,6 +94,8 @@ public RedisHashSetVectorStoreRecordCollection(IDatabase database, string collec // Verify. Verify.NotNull(database); Verify.NotNullOrWhiteSpace(collectionName); + VectorStoreRecordPropertyReader.VerifyGenericDataModelKeyType(typeof(TRecord), options?.HashEntriesCustomMapper is not null, s_supportedKeyTypes); + VectorStoreRecordPropertyReader.VerifyGenericDataModelDefinitionSupplied(typeof(TRecord), options?.VectorStoreRecordDefinition is not null); // Assign. this._database = database; diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisJsonVectorStoreRecordCollection.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisJsonVectorStoreRecordCollection.cs index 19fda49e3f8c..d31d22970e09 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisJsonVectorStoreRecordCollection.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisJsonVectorStoreRecordCollection.cs @@ -80,6 +80,8 @@ public RedisJsonVectorStoreRecordCollection(IDatabase database, string collectio // Verify. Verify.NotNull(database); Verify.NotNullOrWhiteSpace(collectionName); + VectorStoreRecordPropertyReader.VerifyGenericDataModelKeyType(typeof(TRecord), options?.JsonNodeCustomMapper is not null, s_supportedKeyTypes); + VectorStoreRecordPropertyReader.VerifyGenericDataModelDefinitionSupplied(typeof(TRecord), options?.VectorStoreRecordDefinition is not null); // Assign. this._database = database; diff --git a/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs b/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs index d4f06071f66b..f59f760100e4 100644 --- a/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs +++ b/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs @@ -397,6 +397,62 @@ var interfaceType when interfaceType.GetInterfaces().FirstOrDefault(i => i.IsGen } } + /// + /// Checks that if the provided is a that the key type is supported by the default mappers. + /// If not supported, a custom mapper must be supplied, otherwise an exception is thrown. + /// + /// The type of the record data model used by the connector. + /// A value indicating whether a custom mapper was supplied to the connector + /// The list of key types supported by the default mappers. + /// Thrown if the key type of the is not supported by the default mappers and a custom mapper was not supplied. + public static void VerifyGenericDataModelKeyType(Type recordType, bool customMapperSupplied, IEnumerable allowedKeyTypes) + { + // If we are not dealing with a generic data model, no need to check anything else. + if (!recordType.IsGenericType || recordType.GetGenericTypeDefinition() != typeof(VectorStoreGenericDataModel<>)) + { + return; + } + + // If the key type is supported, we are good. + var keyType = recordType.GetGenericArguments()[0]; + if (allowedKeyTypes.Contains(keyType)) + { + return; + } + + // If the key type is not supported out of the box, but a custom mapper was supplied, we are good. + if (customMapperSupplied) + { + return; + } + + throw new ArgumentException($"The key type '{keyType.FullName}' of data model '{nameof(VectorStoreGenericDataModel)}' is not supported by the default mappers. " + + $"Only the following key types are supported: {string.Join(", ", allowedKeyTypes)}. Please provide your own mapper to map to your chosen key type."); + } + + /// + /// Checks that if the provided is a that a is also provided. + /// + /// The type of the record data model used by the connector. + /// A value indicating whether a record definition was supplied to the connector. + /// Thrown if a is not provided when using . + public static void VerifyGenericDataModelDefinitionSupplied(Type recordType, bool recordDefinitionSupplied) + { + // If we are not dealing with a generic data model, no need to check anything else. + if (!recordType.IsGenericType || recordType.GetGenericTypeDefinition() != typeof(VectorStoreGenericDataModel<>)) + { + return; + } + + // If we are dealing with a generic data model, and a record definition was supplied, we are good. + if (recordDefinitionSupplied) + { + return; + } + + throw new ArgumentException($"A {nameof(VectorStoreRecordDefinition)} must be provided when using '{nameof(VectorStoreGenericDataModel)}'."); + } + /// /// Get the JSON property name of a property by using the if available, otherwise /// using the if available, otherwise falling back to the property name. diff --git a/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs b/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs index cfddd8437425..2b5d5d701115 100644 --- a/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs +++ b/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs @@ -254,6 +254,41 @@ public void VerifyPropertyTypesFailsForDisallowedTypes() Assert.Equal("Data properties must be one of the supported types: System.Int32, System.Single. Type of the property 'Data' is System.String.", ex2.Message); } + [Theory] + [InlineData(typeof(SinglePropsModel), false, new Type[] { typeof(string) }, false)] + [InlineData(typeof(VectorStoreGenericDataModel), false, new Type[] { typeof(string), typeof(ulong) }, false)] + [InlineData(typeof(VectorStoreGenericDataModel), true, new Type[] { typeof(string), typeof(ulong) }, false)] + [InlineData(typeof(VectorStoreGenericDataModel), false, new Type[] { typeof(string), typeof(ulong) }, true)] + public void VerifyGenericDataModelKeyTypeThrowsOnlyForUnsupportedKeyTypeWithoutCustomMapper(Type recordType, bool customMapperSupplied, IEnumerable allowedKeyTypes, bool shouldThrow) + { + if (shouldThrow) + { + var ex = Assert.Throws(() => VectorStoreRecordPropertyReader.VerifyGenericDataModelKeyType(recordType, customMapperSupplied, allowedKeyTypes)); + Assert.Equal("The key type 'System.Int32' of data model 'VectorStoreGenericDataModel' is not supported by the default mappers. Only the following key types are supported: System.String, System.UInt64. Please provide your own mapper to map to your chosen key type.", ex.Message); + } + else + { + VectorStoreRecordPropertyReader.VerifyGenericDataModelKeyType(recordType, customMapperSupplied, allowedKeyTypes); + } + } + + [Theory] + [InlineData(typeof(SinglePropsModel), false, false)] + [InlineData(typeof(VectorStoreGenericDataModel), true, false)] + [InlineData(typeof(VectorStoreGenericDataModel), false, true)] + public void VerifyGenericDataModelDefinitionSuppliedThrowsOnlyForMissingDefinition(Type recordType, bool definitionSupplied, bool shouldThrow) + { + if (shouldThrow) + { + var ex = Assert.Throws(() => VectorStoreRecordPropertyReader.VerifyGenericDataModelDefinitionSupplied(recordType, definitionSupplied)); + Assert.Equal("A VectorStoreRecordDefinition must be provided when using 'VectorStoreGenericDataModel'.", ex.Message); + } + else + { + VectorStoreRecordPropertyReader.VerifyGenericDataModelDefinitionSupplied(recordType, definitionSupplied); + } + } + [Fact] public void VerifyStoragePropertyNameMapChecksStorageNameAndFallsBackToPropertyName() {