From db980de4926c5561c0ab60aa64fbe7cde98e2896 Mon Sep 17 00:00:00 2001 From: sunilmujumdar <55596447+sunilmujumdar@users.noreply.github.com> Date: Thu, 9 Nov 2023 19:47:38 -0800 Subject: [PATCH] Fix(Indexing): Improve error reporting for missing data during vector searches (#60) Description: If the vector parameter in a similar_to function call is a variable that evaluates to NULL then we report it as a parsing (syntax) error. The fix, instead of reporting a parsing error, returns an empty result. Closes: HYP-302 --------- Co-authored-by: Bill Province --- ...ing-directive-with-similar-queries.graphql | 21 + ...ing-directive-with-similar-queries.graphql | 584 ++++++++++++++++++ 2 files changed, 605 insertions(+) create mode 100644 graphql/schema/testdata/schemagen/input/embedding-directive-with-similar-queries.graphql create mode 100644 graphql/schema/testdata/schemagen/output/embedding-directive-with-similar-queries.graphql diff --git a/graphql/schema/testdata/schemagen/input/embedding-directive-with-similar-queries.graphql b/graphql/schema/testdata/schemagen/input/embedding-directive-with-similar-queries.graphql new file mode 100644 index 00000000000..8a267fe6dd7 --- /dev/null +++ b/graphql/schema/testdata/schemagen/input/embedding-directive-with-similar-queries.graphql @@ -0,0 +1,21 @@ +# simple user product GraphQL API - v0.4 + +type Product { + id: String! @id + description: String + title: String + imageUrl: String + product_vector: [Float] @hm_embedding +} + +type Purchase @lambdaOnMutate(add: true){ + user: User @hasInverse(field: "purchase_history") + product: Product + date: DateTime @search(by: [day]) +} + +type User { + email: String! @id + purchase_history: [Purchase] + user_vector: [Float] @hm_embedding +} diff --git a/graphql/schema/testdata/schemagen/output/embedding-directive-with-similar-queries.graphql b/graphql/schema/testdata/schemagen/output/embedding-directive-with-similar-queries.graphql new file mode 100644 index 00000000000..9b4faff34a0 --- /dev/null +++ b/graphql/schema/testdata/schemagen/output/embedding-directive-with-similar-queries.graphql @@ -0,0 +1,584 @@ +####################### +# Input Schema +####################### + +type Product { + id: String! @id + description: String + title: String + imageUrl: String + product_vector: [Float] @hm_embedding +} + +type Purchase @lambdaOnMutate(add: true) { + user(filter: UserFilter): User @hasInverse(field: "purchase_history") + product(filter: ProductFilter): Product + date: DateTime @search(by: [day]) +} + +type User { + email: String! @id + purchase_history(filter: PurchaseFilter, order: PurchaseOrder, first: Int, offset: Int): [Purchase] @hasInverse(field: user) + user_vector: [Float] @hm_embedding + purchase_historyAggregate(filter: PurchaseFilter): PurchaseAggregateResult +} + +####################### +# Extended Definitions +####################### + +""" +The Int64 scalar type represents a signed 64‐bit numeric non‐fractional value. +Int64 can represent values in range [-(2^63),(2^63 - 1)]. +""" +scalar Int64 + +""" +The DateTime scalar type represents date and time as a string in RFC3339 format. +For example: "1985-04-12T23:20:50.52Z" represents 20 mins 50.52 secs after the 23rd hour of Apr 12th 1985 in UTC. +""" +scalar DateTime + +input IntRange{ + min: Int! + max: Int! +} + +input FloatRange{ + min: Float! + max: Float! +} + +input Int64Range{ + min: Int64! + max: Int64! +} + +input DateTimeRange{ + min: DateTime! + max: DateTime! +} + +input StringRange{ + min: String! + max: String! +} + +enum DgraphIndex { + int + int64 + float + bool + hash + exact + term + fulltext + trigram + regexp + year + month + day + hour + geo +} + +input AuthRule { + and: [AuthRule] + or: [AuthRule] + not: AuthRule + rule: String +} + +enum HTTPMethod { + GET + POST + PUT + PATCH + DELETE +} + +enum Mode { + BATCH + SINGLE +} + +input CustomHTTP { + url: String! + method: HTTPMethod! + body: String + graphql: String + mode: Mode + forwardHeaders: [String!] + secretHeaders: [String!] + introspectionHeaders: [String!] + skipIntrospection: Boolean +} + +type Point { + longitude: Float! + latitude: Float! +} + +input PointRef { + longitude: Float! + latitude: Float! +} + +input NearFilter { + distance: Float! + coordinate: PointRef! +} + +input PointGeoFilter { + near: NearFilter + within: WithinFilter +} + +type PointList { + points: [Point!]! +} + +input PointListRef { + points: [PointRef!]! +} + +type Polygon { + coordinates: [PointList!]! +} + +input PolygonRef { + coordinates: [PointListRef!]! +} + +type MultiPolygon { + polygons: [Polygon!]! +} + +input MultiPolygonRef { + polygons: [PolygonRef!]! +} + +input WithinFilter { + polygon: PolygonRef! +} + +input ContainsFilter { + point: PointRef + polygon: PolygonRef +} + +input IntersectsFilter { + polygon: PolygonRef + multiPolygon: MultiPolygonRef +} + +input PolygonGeoFilter { + near: NearFilter + within: WithinFilter + contains: ContainsFilter + intersects: IntersectsFilter +} + +input GenerateQueryParams { + get: Boolean + query: Boolean + password: Boolean + aggregate: Boolean +} + +input GenerateMutationParams { + add: Boolean + update: Boolean + delete: Boolean +} + +directive @hasInverse(field: String!) on FIELD_DEFINITION +directive @search(by: [DgraphIndex!]) on FIELD_DEFINITION +directive @hm_embedding on FIELD_DEFINITION +directive @dgraph(type: String, pred: String) on OBJECT | INTERFACE | FIELD_DEFINITION +directive @id(interface: Boolean) on FIELD_DEFINITION +directive @withSubscription on OBJECT | INTERFACE | FIELD_DEFINITION +directive @secret(field: String!, pred: String) on OBJECT | INTERFACE +directive @auth( + password: AuthRule + query: AuthRule, + add: AuthRule, + update: AuthRule, + delete: AuthRule) on OBJECT | INTERFACE +directive @custom(http: CustomHTTP, dql: String) on FIELD_DEFINITION +directive @remote on OBJECT | INTERFACE | UNION | INPUT_OBJECT | ENUM +directive @remoteResponse(name: String) on FIELD_DEFINITION +directive @cascade(fields: [String]) on FIELD +directive @lambda on FIELD_DEFINITION +directive @lambdaOnMutate(add: Boolean, update: Boolean, delete: Boolean) on OBJECT | INTERFACE +directive @cacheControl(maxAge: Int!) on QUERY +directive @generate( + query: GenerateQueryParams, + mutation: GenerateMutationParams, + subscription: Boolean) on OBJECT | INTERFACE + +input IntFilter { + eq: Int + in: [Int] + le: Int + lt: Int + ge: Int + gt: Int + between: IntRange +} + +input Int64Filter { + eq: Int64 + in: [Int64] + le: Int64 + lt: Int64 + ge: Int64 + gt: Int64 + between: Int64Range +} + +input FloatFilter { + eq: Float + in: [Float] + le: Float + lt: Float + ge: Float + gt: Float + between: FloatRange +} + +input DateTimeFilter { + eq: DateTime + in: [DateTime] + le: DateTime + lt: DateTime + ge: DateTime + gt: DateTime + between: DateTimeRange +} + +input StringTermFilter { + allofterms: String + anyofterms: String +} + +input StringRegExpFilter { + regexp: String +} + +input StringFullTextFilter { + alloftext: String + anyoftext: String +} + +input StringExactFilter { + eq: String + in: [String] + le: String + lt: String + ge: String + gt: String + between: StringRange +} + +input StringHashFilter { + eq: String + in: [String] +} + +####################### +# Generated Types +####################### + +type AddProductPayload { + product(filter: ProductFilter, order: ProductOrder, first: Int, offset: Int): [Product] + numUids: Int +} + +type AddPurchasePayload { + purchase(filter: PurchaseFilter, order: PurchaseOrder, first: Int, offset: Int): [Purchase] + numUids: Int +} + +type AddUserPayload { + user(filter: UserFilter, order: UserOrder, first: Int, offset: Int): [User] + numUids: Int +} + +type DeleteProductPayload { + product(filter: ProductFilter, order: ProductOrder, first: Int, offset: Int): [Product] + msg: String + numUids: Int +} + +type DeletePurchasePayload { + purchase(filter: PurchaseFilter, order: PurchaseOrder, first: Int, offset: Int): [Purchase] + msg: String + numUids: Int +} + +type DeleteUserPayload { + user(filter: UserFilter, order: UserOrder, first: Int, offset: Int): [User] + msg: String + numUids: Int +} + +type ProductAggregateResult { + count: Int + idMin: String + idMax: String + descriptionMin: String + descriptionMax: String + titleMin: String + titleMax: String + imageUrlMin: String + imageUrlMax: String +} + +type ProductWithDistance @dgraph(type: "Product") { + id: String! @id + description: String + title: String + imageUrl: String + product_vector: [Float] @hm_embedding + hm_distance: Float +} + +type PurchaseAggregateResult { + count: Int + dateMin: DateTime + dateMax: DateTime +} + +type UpdateProductPayload { + product(filter: ProductFilter, order: ProductOrder, first: Int, offset: Int): [Product] + numUids: Int +} + +type UpdatePurchasePayload { + purchase(filter: PurchaseFilter, order: PurchaseOrder, first: Int, offset: Int): [Purchase] + numUids: Int +} + +type UpdateUserPayload { + user(filter: UserFilter, order: UserOrder, first: Int, offset: Int): [User] + numUids: Int +} + +type UserAggregateResult { + count: Int + emailMin: String + emailMax: String +} + +type UserWithDistance @dgraph(type: "User") { + email: String! @id + purchase_history(filter: PurchaseFilter, order: PurchaseOrder, first: Int, offset: Int): [Purchase] @hasInverse(field: user) + user_vector: [Float] @hm_embedding + purchase_historyAggregate(filter: PurchaseFilter): PurchaseAggregateResult +} + +####################### +# Generated Enums +####################### + +enum ProductEmbedding { + product_vector +} + +enum ProductHasFilter { + id + description + title + imageUrl + product_vector +} + +enum ProductOrderable { + id + description + title + imageUrl +} + +enum PurchaseHasFilter { + user + product + date +} + +enum PurchaseOrderable { + date +} + +enum UserEmbedding { + user_vector +} + +enum UserHasFilter { + email + purchase_history + user_vector +} + +enum UserOrderable { + email +} + +####################### +# Generated Inputs +####################### + +input AddProductInput { + id: String! + description: String + title: String + imageUrl: String + product_vector: [Float] +} + +input AddPurchaseInput { + user: UserRef + product: ProductRef + date: DateTime +} + +input AddUserInput { + email: String! + purchase_history: [PurchaseRef] + user_vector: [Float] +} + +input ProductFilter { + id: StringHashFilter + has: [ProductHasFilter] + and: [ProductFilter] + or: [ProductFilter] + not: ProductFilter +} + +input ProductOrder { + asc: ProductOrderable + desc: ProductOrderable + then: ProductOrder +} + +input ProductPatch { + description: String + title: String + imageUrl: String + product_vector: [Float] +} + +input ProductRef { + id: String + description: String + title: String + imageUrl: String + product_vector: [Float] +} + +input PurchaseFilter { + date: DateTimeFilter + has: [PurchaseHasFilter] + and: [PurchaseFilter] + or: [PurchaseFilter] + not: PurchaseFilter +} + +input PurchaseOrder { + asc: PurchaseOrderable + desc: PurchaseOrderable + then: PurchaseOrder +} + +input PurchasePatch { + user: UserRef + product: ProductRef + date: DateTime +} + +input PurchaseRef { + user: UserRef + product: ProductRef + date: DateTime +} + +input UpdateProductInput { + filter: ProductFilter! + set: ProductPatch + remove: ProductPatch +} + +input UpdatePurchaseInput { + filter: PurchaseFilter! + set: PurchasePatch + remove: PurchasePatch +} + +input UpdateUserInput { + filter: UserFilter! + set: UserPatch + remove: UserPatch +} + +input UserFilter { + email: StringHashFilter + has: [UserHasFilter] + and: [UserFilter] + or: [UserFilter] + not: UserFilter +} + +input UserOrder { + asc: UserOrderable + desc: UserOrderable + then: UserOrder +} + +input UserPatch { + purchase_history: [PurchaseRef] + user_vector: [Float] +} + +input UserRef { + email: String + purchase_history: [PurchaseRef] + user_vector: [Float] +} + +####################### +# Generated Query +####################### + +type Query { + getProduct(id: String!): Product + querySimilarProductById(id: String!, by: ProductEmbedding, topK: Int!): [ProductWithDistance] + querySimilarProductByEmbedding(by: ProductEmbedding, topK: Int!, vector: [Float!]): [ProductWithDistance] + queryProduct(filter: ProductFilter, order: ProductOrder, first: Int, offset: Int): [Product] + aggregateProduct(filter: ProductFilter): ProductAggregateResult + queryPurchase(filter: PurchaseFilter, order: PurchaseOrder, first: Int, offset: Int): [Purchase] + aggregatePurchase(filter: PurchaseFilter): PurchaseAggregateResult + getUser(email: String!): User + querySimilarUserById(email: String!, by: UserEmbedding, topK: Int!): [UserWithDistance] + querySimilarUserByEmbedding(by: UserEmbedding, topK: Int!, vector: [Float!]): [UserWithDistance] + queryUser(filter: UserFilter, order: UserOrder, first: Int, offset: Int): [User] + aggregateUser(filter: UserFilter): UserAggregateResult +} + +####################### +# Generated Mutations +####################### + +type Mutation { + addProduct(input: [AddProductInput!]!, upsert: Boolean): AddProductPayload + updateProduct(input: UpdateProductInput!): UpdateProductPayload + deleteProduct(filter: ProductFilter!): DeleteProductPayload + addPurchase(input: [AddPurchaseInput!]!): AddPurchasePayload + updatePurchase(input: UpdatePurchaseInput!): UpdatePurchasePayload + deletePurchase(filter: PurchaseFilter!): DeletePurchasePayload + addUser(input: [AddUserInput!]!, upsert: Boolean): AddUserPayload + updateUser(input: UpdateUserInput!): UpdateUserPayload + deleteUser(filter: UserFilter!): DeleteUserPayload +} +