diff --git a/README.md b/README.md index 320fd1e..9df36a3 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,20 @@ -# PromptOptimizer -Minimize LLM token complexity to save API costs and model computations. +
-[![lint](https://github.com/TimeTraveller-San/prompt-optimizer/actions/workflows/lint.yml/badge.svg)](https://github.com/TimeTraveller-San/prompt-optimizer/actions/workflows/lint.yml) [![test](https://github.com/TimeTraveller-San/prompt-optimizer/actions/workflows/test.yml/badge.svg)](https://github.com/TimeTraveller-San/prompt-optimizer/actions/workflows/test.yml) [![linkcheck](https://github.com/TimeTraveller-San/prompt-optimizer/actions/workflows/linkcheck.yml/badge.svg)](https://github.com/TimeTraveller-San/prompt-optimizer/actions/workflows/linkcheck.yml) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) + ## PromptOptimizer + + <kevin inspired logo + + Minimize LLM token complexity to save API costs and model computations. + +
+
+ +[![lint](https://github.com/TimeTraveller-San/prompt-optimizer/actions/workflows/lint.yml/badge.svg)](https://github.com/TimeTraveller-San/prompt-optimizer/actions/workflows/lint.yml) +[![test](https://github.com/TimeTraveller-San/prompt-optimizer/actions/workflows/test.yml/badge.svg)](https://github.com/TimeTraveller-San/prompt-optimizer/actions/workflows/test.yml) +[![linkcheck](https://github.com/TimeTraveller-San/prompt-optimizer/actions/workflows/linkcheck.yml/badge.svg)](https://github.com/TimeTraveller-San/prompt-optimizer/actions/workflows/linkcheck.yml) +[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) + +
# Features @@ -10,7 +23,7 @@ Minimize LLM token complexity to save API costs and model computations. - **Multiple Input Format Support:** Optmization of string, batches of strings and JSON prompt data with an option to skip system prompts. - **Sequential Optimization:** Chain different optimizers together sequentially. - **Optimization Metrics:** Number of tokens reduced and semantic similarity before and after optimization. -- **Langhcain Support:** Supports langchain style prompt chains. +- **Langhcain and JSON Support:** Supports langchain style prompt chains and OpenAI request JSON Object. # Why? - **Minimize Token Complexity:** Token Complexity is the amount of prompt tokens required to achieve a given task. Reducing token complexity corresponds to linearly reducing API costs and quadratically reducing computational complexity of usual transformer models. @@ -18,14 +31,20 @@ Minimize LLM token complexity to save API costs and model computations. - **Extend Limitations:** Some models have small context lengths, prompt optimizers can help them process larger than context documents. # Why does it work? + 1. LLMs are powerful, they can infill missing information. 2. Natural language is bulky, large words and phrases can be replaced by smaller ones. +
+ Image +
+ | Prompt | # Tokens | Correct Response? | | ------------------------------------------------------- | ---------- | ------------------- | | Who is the president of the United States of America? | 11 | ✅ | | Who president US | 3 (-72%) | ✅ | + # Installation ### Quick Installation ```pip install prompt-optimizer``` diff --git a/evaluations/artifacts/kevin.gif b/evaluations/artifacts/kevin.gif new file mode 100644 index 0000000..8c650fd Binary files /dev/null and b/evaluations/artifacts/kevin.gif differ diff --git a/evaluations/artifacts/logo.png b/evaluations/artifacts/logo.png new file mode 100644 index 0000000..6e935c4 Binary files /dev/null and b/evaluations/artifacts/logo.png differ diff --git a/evaluations/compute_metric.py b/evaluations/compute_metric.py index bbe690b..44bddb0 100644 --- a/evaluations/compute_metric.py +++ b/evaluations/compute_metric.py @@ -8,5 +8,5 @@ def token_metric(before_samples_dir, after_samples_dir, n_samples_max=100): metric = TokenMetric() avg = 0 for json_before, json_after in zip(before, after): - avg += metric.batch_run(json_before["input"], json_after["input"]) + avg += metric.batch_run(json_before["input"], json_after["input"], json=True)[metric.key] return avg / len(before) diff --git a/evaluations/eval.py b/evaluations/eval.py index 345d7d7..0e9ceb3 100644 --- a/evaluations/eval.py +++ b/evaluations/eval.py @@ -41,7 +41,7 @@ def run_logiqa(exp_name, p_optimizer, n_samples_max=100): json_data["input"] = make_errors.run(json_data["input"]) if p_optimizer is not None: - json_data["input"] = p_optimizer.batch_run( + json_data["input"] = p_optimizer( json_data["input"], skip_system=False, json=True ) @@ -52,9 +52,9 @@ def run_logiqa(exp_name, p_optimizer, n_samples_max=100): tokens_opti_metric = compute_metric.token_metric(samples_path, opti_samples_path) # Compute Evals metric - utils.run_bash( - f"oaieval gpt-3.5-turbo temp --record_path {res_path} --log_to_file {log_path}" - ) + # utils.run_bash( + # f"oaieval gpt-3.5-turbo temp --record_path {res_path} --log_to_file {log_path}" + # ) for line in utils.read_jsonl(res_path): if "final_report" in line: accuracy = line["final_report"]["accuracy"] @@ -82,11 +82,11 @@ def run_logiqa(exp_name, p_optimizer, n_samples_max=100): # "SynonymReplace_Optim_p_1.0": SynonymReplaceOptim(p=1), # "Lemmatizer_Optim": LemmatizerOptim(), # "Stemmer_Optim": StemmerOptim(), - # "NameReplace_Optim": NameReplaceOptim(), + "NameReplace_Optim": NameReplaceOptim(), # "Punctuation_Optim": PunctuationOptim(), # "Autocorrect_Optim": AutocorrectOptim(), - "Pulp_Optim_p_0.05": PulpOptim(p=0.05), - "Pulp_Optim_p_0.1": PulpOptim(p=0.1), + # "Pulp_Optim_p_0.05": PulpOptim(p=0.05), + # "Pulp_Optim_p_0.1": PulpOptim(p=0.1), } for exp_name in EXPERIMENTS: p_optimizer = EXPERIMENTS[exp_name] diff --git a/evaluations/results.csv b/evaluations/results.csv index 98c6d60..e29994e 100644 --- a/evaluations/results.csv +++ b/evaluations/results.csv @@ -11,4 +11,4 @@ NameReplace_Optim,0.011329279462348097,0.34 Punctuation_Optim,0.12810019014299953,0.35 Autocorrect_Optim,0.011435464848382511,0.3 Pulp_Optim_p_0.05,0.05493628125175053,0.31 -Pulp_Optim_p_0.1,0.09521899460726639,0.25 +Pulp_Optim_p_0.1,0.09521899460726639,0.25 \ No newline at end of file diff --git a/prompt_optimizer/__init__.py b/prompt_optimizer/__init__.py index fec1ba1..12c4f7c 100644 --- a/prompt_optimizer/__init__.py +++ b/prompt_optimizer/__init__.py @@ -1,4 +1,3 @@ -from prompt_optimizer.visualize import StringDiffer from prompt_optimizer.metric import BERTScoreMetric, Metric, TokenMetric from prompt_optimizer.poptim import ( LemmatizerOptim, @@ -9,6 +8,7 @@ StemmerOptim, StopWordOptim, ) +from prompt_optimizer.visualize import StringDiffer __all__ = [ "StringDiffer", diff --git a/prompt_optimizer/metric/base.py b/prompt_optimizer/metric/base.py index 9ca350f..bea3c14 100644 --- a/prompt_optimizer/metric/base.py +++ b/prompt_optimizer/metric/base.py @@ -63,6 +63,7 @@ def batch_run( continue else: res = self.run_json(pb, pa) + n += 1 elif langchain: if skip_system and pb.role == "system": diff --git a/prompt_optimizer/poptim/__init__.py b/prompt_optimizer/poptim/__init__.py index 501439b..27c1f5a 100644 --- a/prompt_optimizer/poptim/__init__.py +++ b/prompt_optimizer/poptim/__init__.py @@ -12,8 +12,6 @@ from prompt_optimizer.poptim.stop_word_optim import StopWordOptim from prompt_optimizer.poptim.synonym_replace_optim import SynonymReplaceOptim - - __all__ = [ "Sequential", "PromptOptimize", diff --git a/prompt_optimizer/poptim/lemmatizer_optim.py b/prompt_optimizer/poptim/lemmatizer_optim.py index f032fa1..999f1df 100644 --- a/prompt_optimizer/poptim/lemmatizer_optim.py +++ b/prompt_optimizer/poptim/lemmatizer_optim.py @@ -1,6 +1,5 @@ import nltk from nltk.corpus import wordnet - from nltk.stem import WordNetLemmatizer from prompt_optimizer.poptim.base import PromptOptimize diff --git a/prompt_optimizer/poptim/logger.py b/prompt_optimizer/poptim/logger.py index 7b86572..83d3a83 100644 --- a/prompt_optimizer/poptim/logger.py +++ b/prompt_optimizer/poptim/logger.py @@ -2,6 +2,7 @@ import logging + def configure_logger(log_file=None): logger.setLevel(logging.INFO) diff --git a/prompt_optimizer/poptim/synonym_replace_optim.py b/prompt_optimizer/poptim/synonym_replace_optim.py index dd3f462..7a915cf 100644 --- a/prompt_optimizer/poptim/synonym_replace_optim.py +++ b/prompt_optimizer/poptim/synonym_replace_optim.py @@ -1,7 +1,7 @@ import random -import tiktoken import nltk +import tiktoken from nltk.corpus import wordnet from prompt_optimizer.poptim.base import PromptOptimize diff --git a/prompt_optimizer/visualize/stringdiffer.py b/prompt_optimizer/visualize/stringdiffer.py index 5dd415d..4ec9043 100644 --- a/prompt_optimizer/visualize/stringdiffer.py +++ b/prompt_optimizer/visualize/stringdiffer.py @@ -1,24 +1,25 @@ from difflib import ndiff + class StringDiffer: def __init__(self): """ Initializes a StringDiffer object with the original and optimized strings. """ - pass + pass def __call__(self, original: str, optimized: str) -> None: """ Prints the visualized difference between the original and optimized strings. Deletions are shown in red, insertions in green, and unchanged parts in default color. - + Args: original (str): The original string. - optimized (str): The optimized string. + optimized (str): The optimized string. """ original = str(original) optimized = str(optimized) - + diff = list(ndiff(original, optimized)) output = "" for op, _, value in diff: