[AUR-390] Add prompt template and prompt component (#24)
* Export pipeline to config * Export the input to config * Preliminary creating UI dynamically * Add test for config export * Try out prompt UI * Add example projects * Fix test errors * Standardize interface for retrieval * Finalize the UI demo * Update README.md * Update README * Refactor according to main * Fix typing issue * Add openai key to git-secret * Add prompt template and prompt component * Update test * update tests * revert docstring --------- Co-authored-by: trducng <trungduc1992@gmail.com> Co-authored-by: Nguyen Trung Duc (john) <john@cinnamon.is>
This commit is contained in:
@@ -22,6 +22,9 @@ class Document(BaseDocument):
|
||||
text = self.text
|
||||
return HaystackDocument(content=text, meta=metadata)
|
||||
|
||||
def __str__(self):
|
||||
return self.text
|
||||
|
||||
|
||||
class RetrievedDocument(Document):
|
||||
"""Subclass of Document with retrieval-related information
|
||||
|
@@ -6,7 +6,8 @@ from kotaemon.documents.base import Document
|
||||
|
||||
|
||||
class RegexExtractor(BaseComponent):
|
||||
"""Simple class for extracting text from a document using a regex pattern.
|
||||
"""
|
||||
Simple class for extracting text from a document using a regex pattern.
|
||||
|
||||
Args:
|
||||
pattern (str): The regex pattern to use.
|
||||
|
0
knowledgehub/prompt/__init__.py
Normal file
0
knowledgehub/prompt/__init__.py
Normal file
184
knowledgehub/prompt/base.py
Normal file
184
knowledgehub/prompt/base.py
Normal file
@@ -0,0 +1,184 @@
|
||||
from typing import Union
|
||||
|
||||
from kotaemon.base import BaseComponent
|
||||
from kotaemon.documents.base import Document
|
||||
from kotaemon.prompt.template import PromptTemplate
|
||||
|
||||
|
||||
class BasePrompt(BaseComponent):
|
||||
"""
|
||||
Base class for prompt components.
|
||||
|
||||
Args:
|
||||
template (PromptTemplate): The prompt template.
|
||||
**kwargs: Any additional keyword arguments that will be used to populate the
|
||||
given template.
|
||||
"""
|
||||
|
||||
def __check_redundant_kwargs(self, **kwargs):
|
||||
"""
|
||||
Check for redundant keyword arguments.
|
||||
|
||||
Parameters:
|
||||
**kwargs (dict): A dictionary of keyword arguments.
|
||||
|
||||
Raises:
|
||||
ValueError: If any keys provided are not in the template.
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
provided_keys = set(kwargs.keys())
|
||||
expected_keys = self.template.placeholders
|
||||
|
||||
redundant_keys = provided_keys - expected_keys
|
||||
if redundant_keys:
|
||||
raise ValueError(f"\nKeys provided but not in template: {redundant_keys}")
|
||||
|
||||
def __check_unset_placeholders(self):
|
||||
"""
|
||||
Check if all the placeholders in the template are set.
|
||||
|
||||
This function checks if all the expected placeholders in the template are set as
|
||||
attributes of the object. If any placeholders are missing, a `ValueError`
|
||||
is raised with the names of the missing keys.
|
||||
|
||||
Parameters:
|
||||
None
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
expected_keys = self.template.placeholders
|
||||
|
||||
missing_keys = []
|
||||
for key in expected_keys:
|
||||
if key not in self.__dict__:
|
||||
missing_keys.append(key)
|
||||
|
||||
if missing_keys:
|
||||
raise ValueError(f"\nMissing keys in template: {missing_keys}")
|
||||
|
||||
def __validate_value_type(self, **kwargs):
|
||||
"""
|
||||
Validates the value types of the given keyword arguments.
|
||||
|
||||
Parameters:
|
||||
**kwargs (dict): A dictionary of keyword arguments to be validated.
|
||||
|
||||
Raises:
|
||||
ValueError: If any of the values in the kwargs dictionary have an
|
||||
unsupported type.
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
type_error = []
|
||||
for k, v in kwargs.items():
|
||||
if not isinstance(v, (str, int, Document, BaseComponent)):
|
||||
if isinstance(v, int):
|
||||
kwargs[k] = str(v)
|
||||
type_error.append((k, type(v)))
|
||||
|
||||
if type_error:
|
||||
raise ValueError(
|
||||
"Type of values must be either int, str, Document, BaseComponent, "
|
||||
f"found unsupported type for (key, type): {type_error}"
|
||||
)
|
||||
|
||||
def __set(self, **kwargs):
|
||||
"""
|
||||
Set the values of the attributes in the object based on the provided keyword
|
||||
arguments.
|
||||
|
||||
Args:
|
||||
kwargs (dict): A dictionary with the attribute names as keys and the new
|
||||
values as values.
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
self.__check_redundant_kwargs(**kwargs)
|
||||
self.__validate_value_type(**kwargs)
|
||||
|
||||
self.__dict__.update(kwargs)
|
||||
|
||||
def __prepare_value(self):
|
||||
"""
|
||||
Generate a dictionary of keyword arguments based on the template's placeholders
|
||||
and the current instance's attributes.
|
||||
|
||||
Returns:
|
||||
dict: A dictionary of keyword arguments.
|
||||
"""
|
||||
kwargs = {}
|
||||
for k in self.template.placeholders:
|
||||
v = getattr(self, k)
|
||||
if isinstance(v, (int, Document)):
|
||||
v = str(v)
|
||||
elif isinstance(v, BaseComponent):
|
||||
v = str(v())
|
||||
kwargs[k] = v
|
||||
|
||||
return kwargs
|
||||
|
||||
def __init__(self, template: Union[str, PromptTemplate], **kwargs):
|
||||
super().__init__()
|
||||
self.template = (
|
||||
template
|
||||
if isinstance(template, PromptTemplate)
|
||||
else PromptTemplate(template)
|
||||
)
|
||||
|
||||
self.__set(**kwargs)
|
||||
|
||||
def set(self, **kwargs):
|
||||
"""
|
||||
Similar to `__set` but for external use.
|
||||
|
||||
Set the values of the attributes in the object based on the provided keyword
|
||||
arguments.
|
||||
|
||||
Args:
|
||||
kwargs (dict): A dictionary with the attribute names as keys and the new
|
||||
values as values.
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
self.__set(**kwargs)
|
||||
|
||||
def run(self, **kwargs):
|
||||
"""
|
||||
Run the function with the given keyword arguments.
|
||||
|
||||
Args:
|
||||
**kwargs: The keyword arguments to pass to the function.
|
||||
|
||||
Returns:
|
||||
The result of calling the `populate` method of the `template` object
|
||||
with the given keyword arguments.
|
||||
"""
|
||||
self.__set(**kwargs)
|
||||
self.__check_unset_placeholders()
|
||||
prepared_kwargs = self.__prepare_value()
|
||||
|
||||
return self.template.populate(**prepared_kwargs)
|
||||
|
||||
def run_raw(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
def run_batch_raw(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
def run_document(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
def run_batch_document(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
def is_document(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
def is_batch(self, *args, **kwargs):
|
||||
pass
|
68
knowledgehub/prompt/template.py
Normal file
68
knowledgehub/prompt/template.py
Normal file
@@ -0,0 +1,68 @@
|
||||
import re
|
||||
from typing import Set
|
||||
|
||||
|
||||
class PromptTemplate:
|
||||
"""
|
||||
Base class for prompt templates.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def extract_placeholders(template: str) -> Set[str]:
|
||||
"""
|
||||
Extracts placeholders from a template string.
|
||||
|
||||
Args:
|
||||
template (str): The template string to extract placeholders from.
|
||||
|
||||
Returns:
|
||||
set[str]: A set of placeholder names found in the template string.
|
||||
"""
|
||||
placeholder_regex = r"{([a-zA-Z_][a-zA-Z0-9_]*)}"
|
||||
|
||||
placeholders = set()
|
||||
for item in re.findall(placeholder_regex, template):
|
||||
if item.isidentifier():
|
||||
placeholders.add(item)
|
||||
|
||||
return placeholders
|
||||
|
||||
def __init__(self, template: str):
|
||||
self.placeholders = self.extract_placeholders(template)
|
||||
self.template = template
|
||||
|
||||
def populate(self, **kwargs):
|
||||
"""
|
||||
Populate the template with the given keyword arguments.
|
||||
|
||||
Args:
|
||||
**kwargs: The keyword arguments to populate the template.
|
||||
Each keyword corresponds to a placeholder in the template.
|
||||
|
||||
Returns:
|
||||
str: The populated template.
|
||||
|
||||
Raises:
|
||||
ValueError: If an unknown placeholder is provided.
|
||||
|
||||
"""
|
||||
prompt = self.template
|
||||
for placeholder, value in kwargs.items():
|
||||
if placeholder not in self.placeholders:
|
||||
raise ValueError(f"Unknown placeholder: {placeholder}")
|
||||
prompt = prompt.replace(f"{{{placeholder}}}", value)
|
||||
|
||||
return prompt
|
||||
|
||||
def __add__(self, other):
|
||||
"""
|
||||
Create a new PromptTemplate object by concatenating the template of the current
|
||||
object with the template of another PromptTemplate object.
|
||||
|
||||
Parameters:
|
||||
other (PromptTemplate): Another PromptTemplate object.
|
||||
|
||||
Returns:
|
||||
PromptTemplate: A new PromptTemplate object with the concatenated templates.
|
||||
"""
|
||||
return PromptTemplate(self.template + "\n" + other.template)
|
Reference in New Issue
Block a user