kevin

vaibkumr · May 18, 2023 · fd74c8f · fd74c8f
1 parent b10444d
commit fd74c8f
Show file tree

Hide file tree

Showing 13 changed files with 41 additions and 22 deletions.
diff --git a/README.md b/README.md
@@ -1,7 +1,20 @@
-# PromptOptimizer
-Minimize LLM token complexity to save API costs and model computations.
+<div align="center">
 
-[![lint](https://github.com/TimeTraveller-San/prompt-optimizer/actions/workflows/lint.yml/badge.svg)](https://github.com/TimeTraveller-San/prompt-optimizer/actions/workflows/lint.yml) [![test](https://github.com/TimeTraveller-San/prompt-optimizer/actions/workflows/test.yml/badge.svg)](https://github.com/TimeTraveller-San/prompt-optimizer/actions/workflows/test.yml) [![linkcheck](https://github.com/TimeTraveller-San/prompt-optimizer/actions/workflows/linkcheck.yml/badge.svg)](https://github.com/TimeTraveller-San/prompt-optimizer/actions/workflows/linkcheck.yml) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
+  ## PromptOptimizer
+
+  <<img src="evaluations/artifacts/logo.png" alt="kevin inspired logo" />
+
+  Minimize LLM token complexity to save API costs and model computations.
+
+</div>
+<div align="center">
+
+[![lint](https://github.com/TimeTraveller-San/prompt-optimizer/actions/workflows/lint.yml/badge.svg)](https://github.com/TimeTraveller-San/prompt-optimizer/actions/workflows/lint.yml) 
+[![test](https://github.com/TimeTraveller-San/prompt-optimizer/actions/workflows/test.yml/badge.svg)](https://github.com/TimeTraveller-San/prompt-optimizer/actions/workflows/test.yml) 
+[![linkcheck](https://github.com/TimeTraveller-San/prompt-optimizer/actions/workflows/linkcheck.yml/badge.svg)](https://github.com/TimeTraveller-San/prompt-optimizer/actions/workflows/linkcheck.yml) 
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
+
+</div>
 
 
 # Features
@@ -10,22 +23,28 @@ Minimize LLM token complexity to save API costs and model computations.
 - **Multiple Input Format Support:** Optmization of string, batches of strings and JSON prompt data with an option to skip system prompts.
 - **Sequential Optimization:** Chain different optimizers together sequentially.
 - **Optimization Metrics:** Number of tokens reduced and semantic similarity before and after optimization.
-- **Langhcain Support:** Supports langchain style prompt chains.
+- **Langhcain and JSON Support:** Supports langchain style prompt chains and OpenAI request JSON Object.
 
 # Why?
 - **Minimize Token Complexity:** Token Complexity is the amount of prompt tokens required to achieve a given task. Reducing token complexity corresponds to linearly reducing API costs and quadratically reducing computational complexity of usual transformer models.
 - **Save Money:** For large businesses, saving 10% on token count can lead to saving 100k USD per 1M USD.
 - **Extend Limitations:** Some models have small context lengths, prompt optimizers can help them process larger than context documents.
 
 # Why does it work?
+
 1. LLMs are powerful, they can infill missing information.
 2. Natural language is bulky, large words and phrases can be replaced by smaller ones.
 
+<div style="text-align: center;">
+  <img src="evaluations/artifacts/kevin.gif" alt="Image" />
+</div>
+
 | Prompt | # Tokens | Correct Response? |  
 | ------------------------------------------------------- | ---------- | ------------------- |  
 | Who is the president of the United States of America? | 11 | ✅ |  
 | Who president US | 3  (-72%) | ✅ |
 
+
 # Installation
 ### Quick Installation
 ```pip install prompt-optimizer```

diff --git a/evaluations/artifacts/kevin.gif b/evaluations/artifacts/kevin.gif
diff --git a/evaluations/artifacts/logo.png b/evaluations/artifacts/logo.png
diff --git a/evaluations/compute_metric.py b/evaluations/compute_metric.py
@@ -8,5 +8,5 @@ def token_metric(before_samples_dir, after_samples_dir, n_samples_max=100):
     metric = TokenMetric()
     avg = 0
     for json_before, json_after in zip(before, after):
-        avg += metric.batch_run(json_before["input"], json_after["input"])
+        avg += metric.batch_run(json_before["input"], json_after["input"], json=True)[metric.key]
     return avg / len(before)
diff --git a/evaluations/eval.py b/evaluations/eval.py
@@ -41,7 +41,7 @@ def run_logiqa(exp_name, p_optimizer, n_samples_max=100):
             json_data["input"] = make_errors.run(json_data["input"])
 
         if p_optimizer is not None:
-            json_data["input"] = p_optimizer.batch_run(
+            json_data["input"] = p_optimizer(
                 json_data["input"], skip_system=False, json=True
             )
 
@@ -52,9 +52,9 @@ def run_logiqa(exp_name, p_optimizer, n_samples_max=100):
     tokens_opti_metric = compute_metric.token_metric(samples_path, opti_samples_path)
 
     # Compute Evals metric
-    utils.run_bash(
-        f"oaieval gpt-3.5-turbo temp --record_path {res_path} --log_to_file {log_path}"
-    )
+    # utils.run_bash(
+    #     f"oaieval gpt-3.5-turbo temp --record_path {res_path} --log_to_file {log_path}"
+    # )
     for line in utils.read_jsonl(res_path):
         if "final_report" in line:
             accuracy = line["final_report"]["accuracy"]
@@ -82,11 +82,11 @@ def run_logiqa(exp_name, p_optimizer, n_samples_max=100):
         # "SynonymReplace_Optim_p_1.0": SynonymReplaceOptim(p=1),
         # "Lemmatizer_Optim": LemmatizerOptim(),
         # "Stemmer_Optim": StemmerOptim(),
-        # "NameReplace_Optim": NameReplaceOptim(),
+        "NameReplace_Optim": NameReplaceOptim(),
         # "Punctuation_Optim": PunctuationOptim(),
         # "Autocorrect_Optim": AutocorrectOptim(),
-        "Pulp_Optim_p_0.05": PulpOptim(p=0.05),
-        "Pulp_Optim_p_0.1": PulpOptim(p=0.1),
+        # "Pulp_Optim_p_0.05": PulpOptim(p=0.05),
+        # "Pulp_Optim_p_0.1": PulpOptim(p=0.1),
     }
     for exp_name in EXPERIMENTS:
         p_optimizer = EXPERIMENTS[exp_name]

diff --git a/evaluations/results.csv b/evaluations/results.csv
@@ -11,4 +11,4 @@ NameReplace_Optim,0.011329279462348097,0.34
 Punctuation_Optim,0.12810019014299953,0.35
 Autocorrect_Optim,0.011435464848382511,0.3
 Pulp_Optim_p_0.05,0.05493628125175053,0.31
-Pulp_Optim_p_0.1,0.09521899460726639,0.25
+Pulp_Optim_p_0.1,0.09521899460726639,0.25
diff --git a/prompt_optimizer/__init__.py b/prompt_optimizer/__init__.py
@@ -1,4 +1,3 @@
-from prompt_optimizer.visualize import StringDiffer
 from prompt_optimizer.metric import BERTScoreMetric, Metric, TokenMetric
 from prompt_optimizer.poptim import (
     LemmatizerOptim,
@@ -9,6 +8,7 @@
     StemmerOptim,
     StopWordOptim,
 )
+from prompt_optimizer.visualize import StringDiffer
 
 __all__ = [
     "StringDiffer",

diff --git a/prompt_optimizer/metric/base.py b/prompt_optimizer/metric/base.py
@@ -63,6 +63,7 @@ def batch_run(
                     continue
                 else:
                     res = self.run_json(pb, pa)
+                    n += 1
 
             elif langchain:
                 if skip_system and pb.role == "system":

diff --git a/prompt_optimizer/poptim/__init__.py b/prompt_optimizer/poptim/__init__.py
@@ -12,8 +12,6 @@
 from prompt_optimizer.poptim.stop_word_optim import StopWordOptim
 from prompt_optimizer.poptim.synonym_replace_optim import SynonymReplaceOptim
 
-
-
 __all__ = [
     "Sequential",
     "PromptOptimize",

diff --git a/prompt_optimizer/poptim/lemmatizer_optim.py b/prompt_optimizer/poptim/lemmatizer_optim.py
@@ -1,6 +1,5 @@
 import nltk
 from nltk.corpus import wordnet
-
 from nltk.stem import WordNetLemmatizer
 
 from prompt_optimizer.poptim.base import PromptOptimize

diff --git a/prompt_optimizer/poptim/logger.py b/prompt_optimizer/poptim/logger.py
@@ -2,6 +2,7 @@
 
 import logging
 
+
 def configure_logger(log_file=None):
     logger.setLevel(logging.INFO)
 

diff --git a/prompt_optimizer/poptim/synonym_replace_optim.py b/prompt_optimizer/poptim/synonym_replace_optim.py
@@ -1,7 +1,7 @@
 import random
 
-import tiktoken
 import nltk
+import tiktoken
 from nltk.corpus import wordnet
 
 from prompt_optimizer.poptim.base import PromptOptimize

diff --git a/prompt_optimizer/visualize/stringdiffer.py b/prompt_optimizer/visualize/stringdiffer.py
@@ -1,24 +1,25 @@
 from difflib import ndiff
 
+
 class StringDiffer:
     def __init__(self):
         """
         Initializes a StringDiffer object with the original and optimized strings.
         """
-        pass 
+        pass
 
     def __call__(self, original: str, optimized: str) -> None:
         """
         Prints the visualized difference between the original and optimized strings.
         Deletions are shown in red, insertions in green, and unchanged parts in default color.
-        
+
         Args:
             original (str): The original string.
-            optimized (str): The optimized string.        
+            optimized (str): The optimized string.
         """
         original = str(original)
         optimized = str(optimized)
-        
+
         diff = list(ndiff(original, optimized))
         output = ""
         for op, _, value in diff: