Skip to content

Commit

Permalink
More notebooks added to nbtest (#172)
Browse files Browse the repository at this point in the history
  • Loading branch information
miguelgrinberg authored Jan 26, 2024
1 parent 5369ab6 commit 3be51b3
Show file tree
Hide file tree
Showing 14 changed files with 923 additions and 307 deletions.
8 changes: 7 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,20 @@

test: nbtest notebooks

notebooks: search document-chunking
notebooks: search document-chunking model-upgrades langchain

search:
$(MAKE) -C notebooks/search

document-chunking:
$(MAKE) -C notebooks/document-chunking

model-upgrades:
$(MAKE) -C notebooks/model-upgrades

langchain:
$(MAKE) -C notebooks/langchain

install: pre-commit nbtest

pre-commit:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "385c47c3-27e8-4b51-b8b7-26c97b9a3ad3",
"metadata": {},
"outputs": [],
"source": [
"from elasticsearch import Elasticsearch\n",
"from getpass import getpass\n",
"\n",
"ELASTIC_CLOUD_ID = getpass(\"Elastic Cloud ID: \")\n",
"ELASTIC_API_KEY = getpass(\"Elastic Api Key: \")\n",
"\n",
"client = Elasticsearch(cloud_id=ELASTIC_CLOUD_ID, api_key=ELASTIC_API_KEY,)\n",
"\n",
"# delete the notebook's index\n",
"client.indices.delete(index=\"blogs\", ignore_unavailable=True)\n",
"\n",
"# delete the pipeline\n",
"try:\n",
" client.ingest.delete_pipeline(id=\"vectorize_blogs\")\n",
"except:\n",
" pass\n",
"\n",
"# delete the model\n",
"try:\n",
" client.ml.delete_trained_model(model_id=\"sentence-transformers__all-minilm-l6-v2\", force=True)\n",
"except:\n",
" pass"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
{
"cell_type": "markdown",
"metadata": {
"collapsed": false
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"source": [
"# NLP text search using hugging face transformer model\n",
Expand Down Expand Up @@ -44,14 +47,20 @@
},
"outputs": [],
"source": [
"# install packages\n",
"!python3 -m pip install -qU sentence-transformers eland elasticsearch transformers\n",
"\n",
"!python3 -m pip -qU install sentence-transformers eland elasticsearch transformers"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# import modules\n",
"import pandas as pd, json\n",
"from elasticsearch import Elasticsearch\n",
"from getpass import getpass\n",
"from urllib.request import urlopen"
"from urllib.request import urlopen\n",
"import json"
]
},
{
Expand Down Expand Up @@ -93,8 +102,15 @@
"ELASTIC_CLOUD_ID = getpass(\"Elastic Cloud ID: \")\n",
"\n",
"# https://www.elastic.co/search-labs/tutorials/install-elasticsearch/elastic-cloud#creating-an-api-key\n",
"ELASTIC_API_KEY = getpass(\"Elastic Api Key: \")\n",
"\n",
"ELASTIC_API_KEY = getpass(\"Elastic Api Key: \")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!eland_import_hub_model --cloud-id $ELASTIC_CLOUD_ID --hub-model-id sentence-transformers/all-MiniLM-L6-v2 --task-type text_embedding --es-api-key $ELASTIC_API_KEY --start"
]
},
Expand Down Expand Up @@ -304,7 +320,7 @@
},
{
"cell_type": "code",
"execution_count": 106,
"execution_count": 22,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
Expand All @@ -315,125 +331,40 @@
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>_id</th>\n",
" <th>_score</th>\n",
" <th>fields.title</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>TxUU-YkBAHcz2kFqAun2</td>\n",
" <td>0.591786</td>\n",
" <td>[Brewing in Beats: Track network connections]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>SxUU-YkBAHcz2kFqAun2</td>\n",
" <td>0.401099</td>\n",
" <td>[Machine Learning for Nginx Logs - Identifying...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>UxUU-YkBAHcz2kFqAun2</td>\n",
" <td>0.390279</td>\n",
" <td>[Data Visualization For Machine Learning]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>TBUU-YkBAHcz2kFqAun2</td>\n",
" <td>0.368995</td>\n",
" <td>[Logstash Lines: Introduce integration plugins]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>UhUU-YkBAHcz2kFqAun2</td>\n",
" <td>0.368995</td>\n",
" <td>[Logstash Lines: Introduce integration plugins]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>URUU-YkBAHcz2kFqAun2</td>\n",
" <td>0.356903</td>\n",
" <td>[Keeping up with Kibana: This week in Kibana f...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>UBUU-YkBAHcz2kFqAun2</td>\n",
" <td>0.341939</td>\n",
" <td>[Kibana 4 Video Tutorials, Part 3]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>VBUU-YkBAHcz2kFqAun2</td>\n",
" <td>0.337294</td>\n",
" <td>[Introducing approximate nearest neighbor sear...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>ThUU-YkBAHcz2kFqAun2</td>\n",
" <td>0.336460</td>\n",
" <td>[Where in the World is Elastic? - QCon Beijing...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>TRUU-YkBAHcz2kFqAun2</td>\n",
" <td>0.320756</td>\n",
" <td>[EQL for the masses]</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" _id _score \\\n",
"0 TxUU-YkBAHcz2kFqAun2 0.591786 \n",
"1 SxUU-YkBAHcz2kFqAun2 0.401099 \n",
"2 UxUU-YkBAHcz2kFqAun2 0.390279 \n",
"3 TBUU-YkBAHcz2kFqAun2 0.368995 \n",
"4 UhUU-YkBAHcz2kFqAun2 0.368995 \n",
"5 URUU-YkBAHcz2kFqAun2 0.356903 \n",
"6 UBUU-YkBAHcz2kFqAun2 0.341939 \n",
"7 VBUU-YkBAHcz2kFqAun2 0.337294 \n",
"8 ThUU-YkBAHcz2kFqAun2 0.336460 \n",
"9 TRUU-YkBAHcz2kFqAun2 0.320756 \n",
"\n",
" fields.title \n",
"0 [Brewing in Beats: Track network connections] \n",
"1 [Machine Learning for Nginx Logs - Identifying... \n",
"2 [Data Visualization For Machine Learning] \n",
"3 [Logstash Lines: Introduce integration plugins] \n",
"4 [Logstash Lines: Introduce integration plugins] \n",
"5 [Keeping up with Kibana: This week in Kibana f... \n",
"6 [Kibana 4 Video Tutorials, Part 3] \n",
"7 [Introducing approximate nearest neighbor sear... \n",
"8 [Where in the World is Elastic? - QCon Beijing... \n",
"9 [EQL for the masses] "
]
},
"execution_count": 106,
"metadata": {},
"output_type": "execute_result"
"name": "stdout",
"output_type": "stream",
"text": [
"['Brewing in Beats: Track network connections']\n",
"Score: 0.5917864\n",
"\n",
"['Machine Learning for Nginx Logs - Identifying Operational Issues with Your Website']\n",
"Score: 0.40109876\n",
"\n",
"['Data Visualization For Machine Learning']\n",
"Score: 0.39027885\n",
"\n",
"['Logstash Lines: Introduce integration plugins']\n",
"Score: 0.36899462\n",
"\n",
"['Keeping up with Kibana: This week in Kibana for November 29th, 2019']\n",
"Score: 0.35690257\n",
"\n",
"['How to implement similarity image search | Elastic.co | Elastic Blog']\n",
"Score: 0.34473613\n",
"\n",
"['Kibana 4 Video Tutorials, Part 3']\n",
"Score: 0.34193927\n",
"\n",
"['Introducing approximate nearest neighbor search in Elasticsearch 8.0 | Elastic Blog']\n",
"Score: 0.3372936\n",
"\n",
"['Where in the World is Elastic? - QCon Beijing, Devoxx France, Percona Live & AWS Summit Chicago']\n",
"Score: 0.33645985\n",
"\n",
"['EQL for the masses']\n",
"Score: 0.3207562\n",
"\n"
]
}
],
"source": [
Expand All @@ -458,26 +389,41 @@
" knn=query,\n",
" source=False)\n",
"\n",
"\n",
"results = pd.json_normalize(json.loads(json.dumps(response.body['hits']['hits'])))\n",
"\n",
"# shows the result\n",
"results[['_id', '_score', 'fields.title']]\n"
"def show_results(results):\n",
" for result in results:\n",
" print(f'{result[\"fields\"][\"title\"]}\\nScore: {result[\"_score\"]}\\n')\n",
" \n",
"show_results(response.body['hits']['hits'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3.11.3 64-bit",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"version": "3.9.6"
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
},
"vscode": {
"interpreter": {
Expand All @@ -486,5 +432,5 @@
}
},
"nbformat": 4,
"nbformat_minor": 0
"nbformat_minor": 4
}
11 changes: 11 additions & 0 deletions notebooks/langchain/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
NBTEST = ../../bin/nbtest
NOTEBOOKS = \
langchain-using-own-model.ipynb \
langchain-vector-store-using-elser.ipynb

.PHONY: all $(NOTEBOOKS)

all: $(NOTEBOOKS)

$(NOTEBOOKS):
-$(NBTEST) $@
Loading

0 comments on commit 3be51b3

Please sign in to comment.