From 06e70b629dc5decf12a9da2f2ff197e5542344f4 Mon Sep 17 00:00:00 2001 From: Parker Stafford <52351508+Parker-Stafford@users.noreply.github.com> Date: Thu, 8 Aug 2024 16:21:24 -0700 Subject: [PATCH] docs(haystack): add rag example from haystack (#812) --- .../examples/haystack_rag_pipeline.py | 72 +++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 python/instrumentation/openinference-instrumentation-haystack/examples/haystack_rag_pipeline.py diff --git a/python/instrumentation/openinference-instrumentation-haystack/examples/haystack_rag_pipeline.py b/python/instrumentation/openinference-instrumentation-haystack/examples/haystack_rag_pipeline.py new file mode 100644 index 000000000..8a48c8d20 --- /dev/null +++ b/python/instrumentation/openinference-instrumentation-haystack/examples/haystack_rag_pipeline.py @@ -0,0 +1,72 @@ +from datasets import load_dataset +from haystack import Document, Pipeline +from haystack.components.builders import PromptBuilder +from haystack.components.embedders import ( + SentenceTransformersDocumentEmbedder, + SentenceTransformersTextEmbedder, +) +from haystack.components.generators import OpenAIGenerator +from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever +from haystack.document_stores.in_memory import InMemoryDocumentStore +from openinference.instrumentation.haystack import HaystackInstrumentor +from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter +from opentelemetry.sdk import trace as trace_sdk +from opentelemetry.sdk.trace.export import SimpleSpanProcessor + +endpoint = "http://127.0.0.1:6006/v1/traces" +tracer_provider = trace_sdk.TracerProvider() +tracer_provider.add_span_processor(SimpleSpanProcessor(OTLPSpanExporter(endpoint))) + +HaystackInstrumentor().instrument(tracer_provider=tracer_provider) + +document_store = InMemoryDocumentStore() + +dataset = load_dataset("bilgeyucel/seven-wonders", split="train") +docs = [Document(content=doc["content"], meta=doc["meta"]) for doc in dataset] + + +doc_embedder = SentenceTransformersDocumentEmbedder(model="sentence-transformers/all-MiniLM-L6-v2") +doc_embedder.warm_up() + +docs_with_embeddings = doc_embedder.run(docs) +document_store.write_documents(docs_with_embeddings["documents"]) + +text_embedder = SentenceTransformersTextEmbedder(model="sentence-transformers/all-MiniLM-L6-v2") + +retriever = InMemoryEmbeddingRetriever(document_store) + +template = """ +Given the following information, answer the question. + +Context: +{% for document in documents %} + {{ document.content }} +{% endfor %} + +Question: {{question}} +Answer: +""" + +prompt_builder = PromptBuilder(template=template) + +generator = OpenAIGenerator(model="gpt-3.5-turbo") + +basic_rag_pipeline = Pipeline() +# Add components to your pipeline +basic_rag_pipeline.add_component("text_embedder", text_embedder) +basic_rag_pipeline.add_component("retriever", retriever) +basic_rag_pipeline.add_component("prompt_builder", prompt_builder) +basic_rag_pipeline.add_component("llm", generator) + +# Now, connect the components to each other +basic_rag_pipeline.connect("text_embedder.embedding", "retriever.query_embedding") +basic_rag_pipeline.connect("retriever", "prompt_builder.documents") +basic_rag_pipeline.connect("prompt_builder", "llm") + +question = "What does Rhodes Statue look like?" + +response = basic_rag_pipeline.run( + {"text_embedder": {"text": question}, "prompt_builder": {"question": question}} +) + +print(response["llm"]["replies"][0])