Skip to content

Commit

Permalink
Merge pull request #106 from SCAI-BIO/switch-index-api
Browse files Browse the repository at this point in the history
Switch to INDEX API
  • Loading branch information
tiadams committed Sep 23, 2024
2 parents 12a4604 + 9c9caa7 commit d155a5c
Show file tree
Hide file tree
Showing 9 changed files with 30 additions and 223 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
fail-fast: false
matrix:
node-version: [ 18, 20 ]
python-version: ["3.10", "3.11"]
python-version: ["3.10", "3.11", "3.12"]

steps:
- uses: actions/checkout@v4
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ Data collected in cohort studies lay the groundwork for a plethora of Parkinson

## Requirements

- Python >= 3.10 < 3.12
- Python >= 3.10
- [Angular = 17.1.0](https://angular.io/guide/setup-local)
- [Node.js (LTS) >= 18.13](https://nodejs.org/en/download/package-manager)
- TypeScript >= 5.2.0 < 5.4.0
Expand Down
2 changes: 1 addition & 1 deletion backend/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Use an official Python runtime as a parent image
FROM python:3.11
FROM python:3.12

ENV PDATAVIEWER_ADMIN_USERNAME=foo
ENV PDATAVIEWER_ADMIN_PASSWORD=bar
Expand Down
109 changes: 1 addition & 108 deletions backend/api/routes.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,12 @@
import io
import logging
import os
import tempfile
import zipfile
from contextlib import asynccontextmanager
from typing import Annotated

import numpy as np
from datastew import DataDictionarySource
from datastew.embedding import GPT4Adapter, MPNetAdapter
from datastew.repository.weaviate import WeaviateRepository
from dotenv import load_dotenv
from fastapi import Depends, FastAPI, File, Form, HTTPException, UploadFile
from fastapi import Depends, FastAPI, File, HTTPException, UploadFile
from fastapi.responses import RedirectResponse
from fastapi.security import HTTPBasicCredentials
from functions.autocomplete import autocomplete
Expand All @@ -27,9 +22,7 @@

resources = {}
logger = logging.getLogger("uvicorn.info")
weaviate_url = os.getenv("WEAVIATE_URL", "http://weaviate:8080")
database = SQLLiteRepository()
weaviate = WeaviateRepository(mode="remote", path=weaviate_url)


@asynccontextmanager
Expand Down Expand Up @@ -279,106 +272,6 @@ def get_autocompletion(text: str):
return autocomplete(text, repo=database)


@app.get("/terminologies", tags=["embeddings"])
def get_terminologies():
"""
Terminologies stored in the Weaviate vector database.
"""
terminologies = weaviate.get_all_terminologies()
data = [terminology.name for terminology in terminologies]
return data


@app.get("/embedding-models", tags=["embeddings"])
def get_embedding_models():
"""
Embedding models that can be used for sentence similarity.
"""
return weaviate.get_all_sentence_embedders()


@app.post("/closest-mappings", tags=["embeddings"])
async def get_closest_mappings(
file: UploadFile = File(...),
description_field: str = Form(...),
variable_field: str = Form(...),
selected_model: str = Form(...),
selected_terminology: str = Form(...),
):
"""
Get closest mappings in the selected terminology for the given variable based on its
description embedded by the chosen embedding model
"""
try:
if selected_model == "text-embedding-ada-002":
embedding_model = GPT4Adapter(selected_model)
elif selected_model == "sentence-transformers/all-mpnet-base-v2":
embedding_model = MPNetAdapter(selected_model)
else:
raise HTTPException(status_code=400, detail="Unsupported embedding model")

if file.filename is not None:
file_extension = os.path.splitext(file.filename)[1].lower()
else:
file_extension = None
with tempfile.NamedTemporaryFile(
delete=False, suffix=file_extension
) as tmp_file:
tmp_file.write(await file.read())
tmp_file_path = tmp_file.name

data_dict_source = DataDictionarySource(
file_path=tmp_file_path,
variable_field=variable_field,
description_field=description_field,
)

df = data_dict_source.to_dataframe()
response = []

for _, row in df.iterrows():
variable = row["variable"]
description = row["description"]
embedding = embedding_model.get_embedding(description)
closest_mappings = (
weaviate.get_terminology_and_model_specific_closest_mappings(
embedding, selected_terminology, selected_model, limit=5
)
)

mappings_list = []
for mapping, similarity in closest_mappings:
concept = mapping.concept
terminology = concept.terminology
mappings_list.append(
{
"concept": {
"id": concept.concept_identifier,
"name": concept.pref_label,
"terminology": {
"id": terminology.id,
"name": terminology.name,
},
},
"text": mapping.text,
"similarity": similarity,
}
)
response.append(
{
"variable": variable,
"description": description,
"mappings": mappings_list,
}
)

os.remove(tmp_file_path)
return response

except Exception as e:
raise HTTPException(status_code=500, detail=str(e))


@app.get("/database", tags=["database"])
async def table_names():
"""
Expand Down
111 changes: 7 additions & 104 deletions backend/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,106 +1,9 @@
aiofiles==0.7.0
aiohttp==3.9.5
aiosignal==1.3.1
annotated-types==0.6.0
anyio==4.3.0
attrs==23.2.0
Authlib==1.3.1
certifi==2024.7.4
cffi==1.16.0
charset-normalizer==3.3.2
click==8.1.7
contourpy==1.2.1
cryptography==43.0.0
cycler==0.12.1
datastew==0.3.5
et-xmlfile==1.1.0
fastapi==0.110.2
filelock==3.15.4
fonttools==4.53.1
frozenlist==1.4.1
fsspec==2024.6.1
greenlet==3.0.3
grpcio==1.65.1
grpcio-health-checking==1.65.1
grpcio-tools==1.65.1
h11==0.14.0
httpcore==1.0.5
httptools==0.6.1
httpx==0.27.0
huggingface-hub==0.24.0
idna==3.7
iniconfig==2.0.0
Jinja2==3.1.4
joblib==1.4.2
kiwisolver==1.4.5
MarkupSafe==2.1.5
matplotlib==3.8.4
mpmath==1.3.0
multidict==6.0.5
networkx==3.3
nltk==3.8.1
numpy==1.25.2
nvidia-cublas-cu12==12.1.3.1
nvidia-cuda-cupti-cu12==12.1.105
nvidia-cuda-nvrtc-cu12==12.1.105
nvidia-cuda-runtime-cu12==12.1.105
nvidia-cudnn-cu12==8.9.2.26
nvidia-cufft-cu12==11.0.2.54
nvidia-curand-cu12==10.3.2.106
nvidia-cusolver-cu12==11.4.5.107
nvidia-cusparse-cu12==12.1.0.106
nvidia-nccl-cu12==2.20.5
nvidia-nvjitlink-cu12==12.5.82
nvidia-nvtx-cu12==12.1.105
openai==0.28.1
openpyxl==3.1.5
packaging==24.0
pandas==2.1.0
pillow==10.4.0
plotly==5.17.0
pluggy==1.5.0
protobuf==5.27.2
pycparser==2.22
pydantic==2.5.3
pydantic_core==2.14.6
pyparsing==3.1.2
pytest==8.1.1
python-dateutil==2.8.2
argon2-cffi==23.1.0
fastapi==0.114.2
numpy==2.1.1
pandas==2.2.2
python-dotenv==1.0.1
python-multipart==0.0.9
pytz==2023.3
PyYAML==6.0.1
rapidfuzz==3.8.1
regex==2024.5.15
requests==2.31.0
safetensors==0.4.3
scikit-learn==1.3.2
scipy==1.11.4
seaborn==0.13.2
sentence-transformers==2.3.1
sentencepiece==0.2.0
six==1.16.0
sniffio==1.3.1
SQLAlchemy==2.0.31
starlette==0.37.2
sympy==1.13.1
tenacity==8.5.0
thefuzz==0.20.0
threadpoolctl==3.5.0
tokenizers==0.19.1
torch==2.3.1
tqdm==4.66.4
transformers==4.42.4
triton==2.3.1
typing_extensions==4.11.0
tzdata==2023.3
urllib3==2.2.2
uuid==1.30
uvicorn==0.29.0
uvloop==0.19.0
validators==0.33.0
watchfiles==0.21.0
weaviate-client==4.6.7
websockets==12.0
yarl==1.9.4
argon2-cffi==23.1.0
SQLAlchemy==2.0.35
thefuzz==0.22.1
uvicorn==0.30.6
20 changes: 12 additions & 8 deletions frontend/src/app/auto-harmonizer/auto-harmonizer.component.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import { MatProgressBarModule } from '@angular/material/progress-bar';
import { MatSelectModule } from '@angular/material/select';
import { MatTableDataSource, MatTableModule } from '@angular/material/table';
import { environment } from '../../environments/environment';
import { Response } from '../interfaces/mapping';
import { Terminology, Response } from '../interfaces/mapping';
import { MyErrorStateMatcherService } from '../services/my-error-state-matcher.service';

@Component({
Expand Down Expand Up @@ -64,20 +64,24 @@ export class AutoHarmonizerComponent implements OnInit {
requiredFileType: string =
'.csv, application/vnd.ms-excel, application/vnd.openxmlformats-officedocument.spreadsheetml.sheet';
terminologies: string[] = [];
private API_URL = environment.API_URL;
private API_URL = environment.INDEX_API_URL;

constructor(private http: HttpClient, private fb: FormBuilder) {}

fetchTerminologies(): void {
this.http.get<string[]>(`${this.API_URL}/terminologies`).subscribe({
next: (v) => (this.terminologies = v),
this.http.get<Terminology[]>(`${this.API_URL}/terminologies`).subscribe({
next: (terminologies) => {
this.terminologies = terminologies.map(
(terminology) => terminology.name
);
},
error: (error) => console.error('Fetch error:', error),
complete: () => console.info('Terminologies successfully fetched'),
});
}

fetchEmbeddingModels(): void {
this.http.get<string[]>(`${this.API_URL}/embedding-models`).subscribe({
this.http.get<string[]>(`${this.API_URL}/models`).subscribe({
next: (v) => (this.embeddingModels = v),
error: (error) => console.error('Fetch error:', error),
complete: () => console.info('Embedding models successfully fetched'),
Expand All @@ -86,7 +90,7 @@ export class AutoHarmonizerComponent implements OnInit {

fetchClosestMappings(formData: FormData): void {
this.http
.post<Response[]>(`${this.API_URL}/closest-mappings/`, formData, {
.post<Response[]>(`${this.API_URL}/mappings/dict/`, formData, {
headers: new HttpHeaders({
Accept: 'application/json',
}),
Expand Down Expand Up @@ -154,11 +158,11 @@ export class AutoHarmonizerComponent implements OnInit {
this.autoHarmonizerForm.value.descriptionField
);
this.formData.set(
'selected_model',
'model',
this.autoHarmonizerForm.value.selectedEmbeddingModel
);
this.formData.set(
'selected_terminology',
'terminology_name',
this.autoHarmonizerForm.value.selectedTerminology
);
this.fetchClosestMappings(this.formData);
Expand Down
5 changes: 5 additions & 0 deletions frontend/src/app/interfaces/mapping.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ export interface Mapping {
};
}

export interface Terminology {
name: string;
id: string;
}

export interface Response {
variable: string;
description: string;
Expand Down
1 change: 1 addition & 0 deletions frontend/src/environments/environment.prod.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
export const environment = {
production: true,
API_URL: 'https://api.pdata.k8s.bio.scai.fraunhofer.de',
INDEX_API_URL: 'http://index.bio.scai.fraunhofer.de',
};
1 change: 1 addition & 0 deletions frontend/src/environments/environment.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
export const environment = {
production: false,
API_URL: 'http://localhost:5000',
INDEX_API_URL: 'http://index.bio.scai.fraunhofer.de',
};

0 comments on commit d155a5c

Please sign in to comment.