examples: add python examples for bespoke-minicheck (#6841)

2024-09-18 09:35:25 -07:00
parent 504a410f02
commit bf7ee0f4d4
6 changed files with 346 additions and 0 deletions
--- a/examples/python-grounded-factuality-rag-check/main.py
+++ b/examples/python-grounded-factuality-rag-check/main.py
@@ -0,0 +1,137 @@
+import ollama
+import warnings
+from mattsollamatools import chunker
+from newspaper import Article
+import numpy as np
+from sklearn.neighbors import NearestNeighbors
+import nltk
+
+warnings.filterwarnings(
+    "ignore", category=FutureWarning, module="transformers.tokenization_utils_base"
+)
+nltk.download("punkt", quiet=True)
+
+
+def getArticleText(url):
+    """Gets the text of an article from a URL.
+
+    Often there are a bunch of ads and menus on pages for a news article.
+    This uses newspaper3k to get just the text of just the article.
+    """
+    article = Article(url)
+    article.download()
+    article.parse()
+    return article.text
+
+
+def knn_search(question_embedding, embeddings, k=5):
+    """Performs K-nearest neighbors (KNN) search"""
+    X = np.array(
+        [item["embedding"] for article in embeddings for item in article["embeddings"]]
+    )
+    source_texts = [
+        item["source"] for article in embeddings for item in article["embeddings"]
+    ]
+
+    # Fit a KNN model on the embeddings
+    knn = NearestNeighbors(n_neighbors=k, metric="cosine")
+    knn.fit(X)
+
+    # Find the indices and distances of the k-nearest neighbors.
+    _, indices = knn.kneighbors(question_embedding, n_neighbors=k)
+
+    # Get the indices and source texts of the best matches
+    best_matches = [(indices[0][i], source_texts[indices[0][i]]) for i in range(k)]
+
+    return best_matches
+
+
+def check(document, claim):
+    """Checks if the claim is supported by the document by calling bespoke-minicheck.
+
+    Returns Yes/yes if the claim is supported by the document, No/no otherwise.
+    Support for logits will be added in the future.
+
+    bespoke-minicheck's system prompt is defined as:
+      'Determine whether the provided claim is consistent with the corresponding
+      document. Consistency in this context implies that all information presented in the claim
+      is substantiated by the document. If not, it should be considered inconsistent. Please
+      assess the claim's consistency with the document by responding with either "Yes" or "No".'
+
+    bespoke-minicheck's user prompt is defined as:
+      "Document: {document}\nClaim: {claim}"
+    """
+    prompt = f"Document: {document}\nClaim: {claim}"
+    response = ollama.generate(
+        model="bespoke-minicheck", prompt=prompt, options={"num_predict": 2, "temperature": 0.0}
+    )
+    return response["response"].strip()
+
+
+if __name__ == "__main__":
+    allEmbeddings = []
+    default_url = "https://www.theverge.com/2024/9/12/24242439/openai-o1-model-reasoning-strawberry-chatgpt"
+    user_input = input(
+        "Enter the URL of an article you want to chat with, or press Enter for default example: "
+    )
+    article_url = user_input.strip() if user_input.strip() else default_url
+    article = {}
+    article["embeddings"] = []
+    article["url"] = article_url
+    text = getArticleText(article_url)
+    chunks = chunker(text)
+
+    # Embed (batch) chunks using ollama
+    embeddings = ollama.embed(model="all-minilm", input=chunks)["embeddings"]
+
+    for chunk, embedding in zip(chunks, embeddings):
+        item = {}
+        item["source"] = chunk
+        item["embedding"] = embedding
+        item["sourcelength"] = len(chunk)
+        article["embeddings"].append(item)
+
+    allEmbeddings.append(article)
+
+    print(f"\nLoaded, chunked, and embedded text from {article_url}.\n")
+
+    while True:
+        # Input a question from the user
+        # For example, "Who is the chief research officer?"
+        question = input("Enter your question or type quit: ")
+
+        if question.lower() == "quit":
+            break
+
+        # Embed the user's question using ollama.embed
+        question_embedding = ollama.embed(model="all-minilm", input=question)[
+            "embeddings"
+        ]
+
+        # Perform KNN search to find the best matches (indices and source text)
+        best_matches = knn_search(question_embedding, allEmbeddings, k=4)
+
+        sourcetext = "\n\n".join([source_text for (_, source_text) in best_matches])
+
+        print(f"\nRetrieved chunks: \n{sourcetext}\n")
+
+        # Give the retreived chunks and question to the chat model
+        system_prompt = f"Only use the following information to answer the question. Do not use anything else: {sourcetext}"
+
+        ollama_response = ollama.generate(
+            model="llama3.1",
+            prompt=question,
+            system=system_prompt,
+            options={"stream": False},
+        )
+
+        answer = ollama_response["response"]
+        print(f"LLM Answer:\n{answer}\n")
+
+        # Check each sentence in the response for grounded factuality
+        if answer:
+            for claim in nltk.sent_tokenize(answer):
+                print(f"LLM Claim: {claim}")
+                print(
+                    f"Is this claim supported by the context according to bespoke-minicheck? {check(sourcetext, claim)}\n"
+                )