Skip to content

Commit

Permalink
Support CosmosDB (mongo vcore vector DB) as data source (#356)
Browse files Browse the repository at this point in the history
Co-authored-by: Sarah Widder <sawidder@microsoft.com>
  • Loading branch information
sarah-widder and sarah-widder committed Nov 13, 2023
1 parent 954de60 commit 311deff
Showing 1 changed file with 77 additions and 24 deletions.
101 changes: 77 additions & 24 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,23 +27,28 @@ def favicon():
def assets(path):
return send_from_directory("static/assets", path)

# On Your Data Settings
DATASOURCE_TYPE = os.environ.get("DATASOURCE_TYPE", "AzureCognitiveSearch")
SEARCH_TOP_K = os.environ.get("SEARCH_TOP_K", 5)
SEARCH_STRICTNESS = os.environ.get("SEARCH_STRICTNESS", 3)
SEARCH_ENABLE_IN_DOMAIN = os.environ.get("SEARCH_ENABLE_IN_DOMAIN", "true")

# ACS Integration Settings
AZURE_SEARCH_SERVICE = os.environ.get("AZURE_SEARCH_SERVICE")
AZURE_SEARCH_INDEX = os.environ.get("AZURE_SEARCH_INDEX")
AZURE_SEARCH_KEY = os.environ.get("AZURE_SEARCH_KEY")
AZURE_SEARCH_USE_SEMANTIC_SEARCH = os.environ.get("AZURE_SEARCH_USE_SEMANTIC_SEARCH", "false")
AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG = os.environ.get("AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG", "default")
AZURE_SEARCH_TOP_K = os.environ.get("AZURE_SEARCH_TOP_K", 5)
AZURE_SEARCH_ENABLE_IN_DOMAIN = os.environ.get("AZURE_SEARCH_ENABLE_IN_DOMAIN", "true")
AZURE_SEARCH_TOP_K = os.environ.get("AZURE_SEARCH_TOP_K", SEARCH_TOP_K)
AZURE_SEARCH_ENABLE_IN_DOMAIN = os.environ.get("AZURE_SEARCH_ENABLE_IN_DOMAIN", SEARCH_ENABLE_IN_DOMAIN)
AZURE_SEARCH_CONTENT_COLUMNS = os.environ.get("AZURE_SEARCH_CONTENT_COLUMNS")
AZURE_SEARCH_FILENAME_COLUMN = os.environ.get("AZURE_SEARCH_FILENAME_COLUMN")
AZURE_SEARCH_TITLE_COLUMN = os.environ.get("AZURE_SEARCH_TITLE_COLUMN")
AZURE_SEARCH_URL_COLUMN = os.environ.get("AZURE_SEARCH_URL_COLUMN")
AZURE_SEARCH_VECTOR_COLUMNS = os.environ.get("AZURE_SEARCH_VECTOR_COLUMNS")
AZURE_SEARCH_QUERY_TYPE = os.environ.get("AZURE_SEARCH_QUERY_TYPE")
AZURE_SEARCH_PERMITTED_GROUPS_COLUMN = os.environ.get("AZURE_SEARCH_PERMITTED_GROUPS_COLUMN")
AZURE_SEARCH_STRICTNESS = os.environ.get("AZURE_SEARCH_STRICTNESS", 3)
AZURE_SEARCH_STRICTNESS = os.environ.get("AZURE_SEARCH_STRICTNESS", SEARCH_STRICTNESS)

# AOAI Integration Settings
AZURE_OPENAI_RESOURCE = os.environ.get("AZURE_OPENAI_RESOURCE")
Expand All @@ -62,16 +67,30 @@ def assets(path):
AZURE_OPENAI_EMBEDDING_KEY = os.environ.get("AZURE_OPENAI_EMBEDDING_KEY")
AZURE_OPENAI_EMBEDDING_NAME = os.environ.get("AZURE_OPENAI_EMBEDDING_NAME", "")

# CosmosDB Mongo vcore vector db Settings
AZURE_COSMOSDB_MONGO_VCORE_CONNECTION_STRING = os.environ.get("AZURE_COSMOSDB_MONGO_VCORE_CONNECTION_STRING") #This has to be secure string
AZURE_COSMOSDB_MONGO_VCORE_DATABASE = os.environ.get("AZURE_COSMOSDB_MONGO_VCORE_DATABASE")
AZURE_COSMOSDB_MONGO_VCORE_CONTAINER = os.environ.get("AZURE_COSMOSDB_MONGO_VCORE_CONTAINER")
AZURE_COSMOSDB_MONGO_VCORE_INDEX = os.environ.get("AZURE_COSMOSDB_MONGO_VCORE_INDEX")
AZURE_COSMOSDB_MONGO_VCORE_TOP_K = os.environ.get("AZURE_COSMOSDB_MONGO_VCORE_TOP_K", AZURE_SEARCH_TOP_K)
AZURE_COSMOSDB_MONGO_VCORE_STRICTNESS = os.environ.get("AZURE_COSMOSDB_MONGO_VCORE_STRICTNESS", AZURE_SEARCH_STRICTNESS)
AZURE_COSMOSDB_MONGO_VCORE_ENABLE_IN_DOMAIN = os.environ.get("AZURE_COSMOSDB_MONGO_VCORE_ENABLE_IN_DOMAIN", AZURE_SEARCH_ENABLE_IN_DOMAIN)
AZURE_COSMOSDB_MONGO_VCORE_CONTENT_COLUMNS = os.environ.get("AZURE_COSMOSDB_MONGO_VCORE_CONTENT_COLUMNS", "")
AZURE_COSMOSDB_MONGO_VCORE_FILENAME_COLUMN = os.environ.get("AZURE_COSMOSDB_MONGO_VCORE_FILENAME_COLUMN")
AZURE_COSMOSDB_MONGO_VCORE_TITLE_COLUMN = os.environ.get("AZURE_COSMOSDB_MONGO_VCORE_TITLE_COLUMN")
AZURE_COSMOSDB_MONGO_VCORE_URL_COLUMN = os.environ.get("AZURE_COSMOSDB_MONGO_VCORE_URL_COLUMN")
AZURE_COSMOSDB_MONGO_VCORE_VECTOR_COLUMNS = os.environ.get("AZURE_COSMOSDB_MONGO_VCORE_VECTOR_COLUMNS")


SHOULD_STREAM = True if AZURE_OPENAI_STREAM.lower() == "true" else False

# CosmosDB Integration Settings
# Chat History CosmosDB Integration Settings
AZURE_COSMOSDB_DATABASE = os.environ.get("AZURE_COSMOSDB_DATABASE")
AZURE_COSMOSDB_ACCOUNT = os.environ.get("AZURE_COSMOSDB_ACCOUNT")
AZURE_COSMOSDB_CONVERSATIONS_CONTAINER = os.environ.get("AZURE_COSMOSDB_CONVERSATIONS_CONTAINER")
AZURE_COSMOSDB_ACCOUNT_KEY = os.environ.get("AZURE_COSMOSDB_ACCOUNT_KEY")

# Initialize a CosmosDB client with AAD auth and containers
# Initialize a CosmosDB client with AAD auth and containers for Chat History
cosmos_conversation_client = None
if AZURE_COSMOSDB_DATABASE and AZURE_COSMOSDB_ACCOUNT and AZURE_COSMOSDB_CONVERSATIONS_CONTAINER:
try :
Expand Down Expand Up @@ -101,6 +120,10 @@ def is_chat_model():
def should_use_data():
if AZURE_SEARCH_SERVICE and AZURE_SEARCH_INDEX and AZURE_SEARCH_KEY:
return True

if AZURE_COSMOSDB_MONGO_VCORE_DATABASE and AZURE_COSMOSDB_MONGO_VCORE_CONTAINER and AZURE_COSMOSDB_MONGO_VCORE_INDEX and AZURE_COSMOSDB_MONGO_VCORE_CONNECTION_STRING:
return True

return False


Expand Down Expand Up @@ -147,28 +170,32 @@ def generateFilterString(userToken):
def prepare_body_headers_with_data(request):
request_messages = request.json["messages"]

# Set query type
query_type = "simple"
if AZURE_SEARCH_QUERY_TYPE:
query_type = AZURE_SEARCH_QUERY_TYPE
elif AZURE_SEARCH_USE_SEMANTIC_SEARCH.lower() == "true" and AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG:
query_type = "semantic"

# Set filter
filter = None
userToken = None
if AZURE_SEARCH_PERMITTED_GROUPS_COLUMN:
userToken = request.headers.get('X-MS-TOKEN-AAD-ACCESS-TOKEN', "")
filter = generateFilterString(userToken)

body = {
"messages": request_messages,
"temperature": float(AZURE_OPENAI_TEMPERATURE),
"max_tokens": int(AZURE_OPENAI_MAX_TOKENS),
"top_p": float(AZURE_OPENAI_TOP_P),
"stop": AZURE_OPENAI_STOP_SEQUENCE.split("|") if AZURE_OPENAI_STOP_SEQUENCE else None,
"stream": SHOULD_STREAM,
"dataSources": [
"dataSources": []
}

if DATASOURCE_TYPE == "AzureCognitiveSearch":
# Set query type
query_type = "simple"
if AZURE_SEARCH_QUERY_TYPE:
query_type = AZURE_SEARCH_QUERY_TYPE
elif AZURE_SEARCH_USE_SEMANTIC_SEARCH.lower() == "true" and AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG:
query_type = "semantic"

# Set filter
filter = None
userToken = None
if AZURE_SEARCH_PERMITTED_GROUPS_COLUMN:
userToken = request.headers.get('X-MS-TOKEN-AAD-ACCESS-TOKEN', "")
filter = generateFilterString(userToken)

body["dataSources"].append(
{
"type": "AzureCognitiveSearch",
"parameters": {
Expand All @@ -190,9 +217,35 @@ def prepare_body_headers_with_data(request):
"filter": filter,
"strictness": int(AZURE_SEARCH_STRICTNESS)
}
}
]
}
})
elif DATASOURCE_TYPE == "AzureCosmosDB":
# Set query type
query_type = "vector"

body["dataSources"].append(
{
"type": "AzureCosmosDB",
"parameters": {
"connectionString": AZURE_COSMOSDB_MONGO_VCORE_CONNECTION_STRING,
"indexName": AZURE_COSMOSDB_MONGO_VCORE_INDEX,
"databaseName": AZURE_COSMOSDB_MONGO_VCORE_DATABASE,
"containerName": AZURE_COSMOSDB_MONGO_VCORE_CONTAINER,
"fieldsMapping": {
"contentFields": AZURE_COSMOSDB_MONGO_VCORE_CONTENT_COLUMNS.split("|") if AZURE_COSMOSDB_MONGO_VCORE_CONTENT_COLUMNS else [],
"titleField": AZURE_COSMOSDB_MONGO_VCORE_TITLE_COLUMN if AZURE_COSMOSDB_MONGO_VCORE_TITLE_COLUMN else None,
"urlField": AZURE_COSMOSDB_MONGO_VCORE_URL_COLUMN if AZURE_COSMOSDB_MONGO_VCORE_URL_COLUMN else None,
"filepathField": AZURE_COSMOSDB_MONGO_VCORE_FILENAME_COLUMN if AZURE_COSMOSDB_MONGO_VCORE_FILENAME_COLUMN else None,
"vectorFields": AZURE_COSMOSDB_MONGO_VCORE_VECTOR_COLUMNS.split("|") if AZURE_COSMOSDB_MONGO_VCORE_VECTOR_COLUMNS else []
},
"inScope": True if AZURE_COSMOSDB_MONGO_VCORE_ENABLE_IN_DOMAIN.lower() == "true" else False,
"topNDocuments": AZURE_COSMOSDB_MONGO_VCORE_TOP_K,
"strictness": int(AZURE_COSMOSDB_MONGO_VCORE_STRICTNESS),
"queryType": query_type,
"roleInformation": AZURE_OPENAI_SYSTEM_MESSAGE
}
})
else:
raise Exception(f"DATASOURCE_TYPE is not configured or unknown: {DATASOURCE_TYPE}")

if "vector" in query_type.lower():
if AZURE_OPENAI_EMBEDDING_NAME:
Expand All @@ -205,7 +258,7 @@ def prepare_body_headers_with_data(request):
headers = {
'Content-Type': 'application/json',
'api-key': AZURE_OPENAI_KEY,
"x-ms-useragent": "GitHubSampleWebApp/PublicAPI/2.0.0"
"x-ms-useragent": "GitHubSampleWebApp/PublicAPI/3.0.0"
}

return body, headers
Expand Down

0 comments on commit 311deff

Please sign in to comment.