Best docs Cinnamon will probably ever have (#105)
This commit is contained in:
@@ -67,7 +67,7 @@ class LangchainAgent(BaseAgent):
|
||||
def run(self, instruction: str) -> AgentOutput:
|
||||
assert (
|
||||
self.agent is not None
|
||||
), "Lanchain AgentExecutor is not correclty initialized"
|
||||
), "Lanchain AgentExecutor is not correctly initialized"
|
||||
|
||||
# Langchain AgentExecutor call
|
||||
output = self.agent(instruction)["output"]
|
||||
|
@@ -6,16 +6,16 @@ from kotaemon.base.schema import Document
|
||||
|
||||
|
||||
class BaseComponent(Function):
|
||||
"""A component is a class that can be used to compose a pipeline
|
||||
"""A component is a class that can be used to compose a pipeline.
|
||||
|
||||
Benefits of component:
|
||||
!!! tip "Benefits of component"
|
||||
- Auto caching, logging
|
||||
- Allow deployment
|
||||
|
||||
For each component, the spirit is:
|
||||
!!! tip "For each component, the spirit is"
|
||||
- Tolerate multiple input types, e.g. str, Document, List[str], List[Document]
|
||||
- Enforce single output type. Hence, the output type of a component should be
|
||||
as generic as possible.
|
||||
as generic as possible.
|
||||
"""
|
||||
|
||||
inflow = None
|
||||
|
@@ -22,6 +22,9 @@ class Document(BaseDocument):
|
||||
This class accept one positional argument `content` of an arbitrary type, which will
|
||||
store the raw content of the document. If specified, the class will use
|
||||
`content` to initialize the base llama_index class.
|
||||
|
||||
Args:
|
||||
content: the raw content of the document.
|
||||
"""
|
||||
|
||||
content: Any
|
||||
@@ -99,7 +102,7 @@ class RetrievedDocument(Document):
|
||||
"""Subclass of Document with retrieval-related information
|
||||
|
||||
Attributes:
|
||||
score (float): score of the document (from 0.0 to 1.0)
|
||||
score (float): score of the document (from 0.0 to 1.0)
|
||||
retrieval_metadata (dict): metadata from the retrieval process, can be used
|
||||
by different components in a retrieved pipeline to communicate with each
|
||||
other
|
||||
|
@@ -4,6 +4,7 @@ from .base import BaseLLM
|
||||
from .branching import GatedBranchingPipeline, SimpleBranchingPipeline
|
||||
from .chats import AzureChatOpenAI, ChatLLM
|
||||
from .completions import LLM, AzureOpenAI, OpenAI
|
||||
from .cot import ManualSequentialChainOfThought, Thought
|
||||
from .linear import GatedLinearPipeline, SimpleLinearPipeline
|
||||
from .prompts import BasePromptComponent, PromptTemplate
|
||||
|
||||
@@ -28,4 +29,7 @@ __all__ = [
|
||||
"GatedLinearPipeline",
|
||||
"SimpleBranchingPipeline",
|
||||
"GatedBranchingPipeline",
|
||||
# chain-of-thoughts
|
||||
"ManualSequentialChainOfThought",
|
||||
"Thought",
|
||||
]
|
||||
|
@@ -12,7 +12,8 @@ class SimpleBranchingPipeline(BaseComponent):
|
||||
Attributes:
|
||||
branches (List[BaseComponent]): The list of branches to be executed.
|
||||
|
||||
Example Usage:
|
||||
Example:
|
||||
```python
|
||||
from kotaemon.llms import (
|
||||
AzureChatOpenAI,
|
||||
BasePromptComponent,
|
||||
@@ -45,6 +46,7 @@ class SimpleBranchingPipeline(BaseComponent):
|
||||
print(pipeline(condition_text="1"))
|
||||
print(pipeline(condition_text="2"))
|
||||
print(pipeline(condition_text="12"))
|
||||
```
|
||||
"""
|
||||
|
||||
branches: List[BaseComponent] = Param(default_callback=lambda *_: [])
|
||||
@@ -87,7 +89,8 @@ class GatedBranchingPipeline(SimpleBranchingPipeline):
|
||||
Attributes:
|
||||
branches (List[BaseComponent]): The list of branches to be executed.
|
||||
|
||||
Example Usage:
|
||||
Example:
|
||||
```python
|
||||
from kotaemon.llms import (
|
||||
AzureChatOpenAI,
|
||||
BasePromptComponent,
|
||||
@@ -119,6 +122,7 @@ class GatedBranchingPipeline(SimpleBranchingPipeline):
|
||||
)
|
||||
print(pipeline(condition_text="1"))
|
||||
print(pipeline(condition_text="2"))
|
||||
```
|
||||
"""
|
||||
|
||||
def run(self, *, condition_text: Optional[str] = None, **prompt_kwargs):
|
||||
@@ -135,7 +139,7 @@ class GatedBranchingPipeline(SimpleBranchingPipeline):
|
||||
Union[OutputType, None]: The output of the first branch that satisfies the
|
||||
condition, or None if no branch satisfies the condition.
|
||||
|
||||
Raise:
|
||||
Raises:
|
||||
ValueError: If condition_text is None
|
||||
"""
|
||||
if condition_text is None:
|
||||
|
@@ -1,7 +1,9 @@
|
||||
from copy import deepcopy
|
||||
from typing import Callable, List
|
||||
|
||||
from kotaemon.base import BaseComponent, Document, Node, Param
|
||||
from theflow import Function, Node, Param
|
||||
|
||||
from kotaemon.base import BaseComponent, Document
|
||||
|
||||
from .chats import AzureChatOpenAI
|
||||
from .completions import LLM
|
||||
@@ -66,13 +68,13 @@ class Thought(BaseComponent):
|
||||
|
||||
prompt: str = Param(
|
||||
help=(
|
||||
"The prompt template string. This prompt template has Python-like "
|
||||
"variable placeholders, that then will be subsituted with real values when "
|
||||
"this component is executed"
|
||||
"The prompt template string. This prompt template has Python-like variable"
|
||||
" placeholders, that then will be substituted with real values when this"
|
||||
" component is executed"
|
||||
)
|
||||
)
|
||||
llm: LLM = Node(AzureChatOpenAI, help="The LLM model to execute the input prompt")
|
||||
post_process: BaseComponent = Node(
|
||||
post_process: Function = Node(
|
||||
help=(
|
||||
"The function post-processor that post-processes LLM output prediction ."
|
||||
"It should take a string as input (this is the LLM output text) and return "
|
||||
@@ -83,7 +85,7 @@ class Thought(BaseComponent):
|
||||
@Node.auto(depends_on="prompt")
|
||||
def prompt_template(self):
|
||||
"""Automatically wrap around param prompt. Can ignore"""
|
||||
return BasePromptComponent(template=self.prompt)
|
||||
return BasePromptComponent(self.prompt)
|
||||
|
||||
def run(self, **kwargs) -> Document:
|
||||
"""Run the chain of thought"""
|
||||
@@ -113,20 +115,19 @@ class ManualSequentialChainOfThought(BaseComponent):
|
||||
|
||||
**Create and run a chain of thought without "+" operator:**
|
||||
|
||||
```python
|
||||
>> from kotaemon.pipelines.cot import Thought, ManualSequentialChainOfThought
|
||||
|
||||
>> llm = AzureChatOpenAI(...)
|
||||
>> thought1 = Thought(
|
||||
prompt="Word {word} in {language} is ",
|
||||
post_process=lambda string: {"translated": string},
|
||||
)
|
||||
>> thought2 = Thought(
|
||||
prompt="Translate {translated} to Japanese",
|
||||
post_process=lambda string: {"output": string},
|
||||
)
|
||||
>> thought = ManualSequentialChainOfThought(thoughts=[thought1, thought2], llm=llm)
|
||||
>> thought(word="hello", language="French")
|
||||
```pycon
|
||||
>>> from kotaemon.pipelines.cot import Thought, ManualSequentialChainOfThought
|
||||
>>> llm = AzureChatOpenAI(...)
|
||||
>>> thought1 = Thought(
|
||||
>>> prompt="Word {word} in {language} is ",
|
||||
>>> post_process=lambda string: {"translated": string},
|
||||
>>> )
|
||||
>>> thought2 = Thought(
|
||||
>>> prompt="Translate {translated} to Japanese",
|
||||
>>> post_process=lambda string: {"output": string},
|
||||
>>> )
|
||||
>>> thought = ManualSequentialChainOfThought(thoughts=[thought1, thought2], llm=llm)
|
||||
>>> thought(word="hello", language="French")
|
||||
{'word': 'hello',
|
||||
'language': 'French',
|
||||
'translated': '"Bonjour"',
|
||||
|
@@ -21,6 +21,7 @@ class SimpleLinearPipeline(BaseComponent):
|
||||
post-processor component or function.
|
||||
|
||||
Example Usage:
|
||||
```python
|
||||
from kotaemon.llms import AzureChatOpenAI, BasePromptComponent
|
||||
|
||||
def identity(x):
|
||||
@@ -41,6 +42,7 @@ class SimpleLinearPipeline(BaseComponent):
|
||||
post_processor=identity,
|
||||
)
|
||||
print(pipeline(word="lone"))
|
||||
```
|
||||
"""
|
||||
|
||||
prompt: BasePromptComponent
|
||||
@@ -85,7 +87,8 @@ class GatedLinearPipeline(SimpleLinearPipeline):
|
||||
condition (Callable[[IO_Type], Any]): A callable function that represents the
|
||||
condition.
|
||||
|
||||
Example Usage:
|
||||
Usage:
|
||||
```{.py3 title="Example Usage"}
|
||||
from kotaemon.llms import AzureChatOpenAI, BasePromptComponent
|
||||
from kotaemon.parsers import RegexExtractor
|
||||
|
||||
@@ -109,6 +112,7 @@ class GatedLinearPipeline(SimpleLinearPipeline):
|
||||
)
|
||||
print(pipeline(condition_text="some pattern", word="lone"))
|
||||
print(pipeline(condition_text="other pattern", word="lone"))
|
||||
```
|
||||
"""
|
||||
|
||||
condition: Callable[[IO_Type], Any]
|
||||
|
@@ -72,7 +72,7 @@ class PromptTemplate:
|
||||
UserWarning,
|
||||
)
|
||||
|
||||
def populate(self, **kwargs):
|
||||
def populate(self, **kwargs) -> str:
|
||||
"""
|
||||
Strictly populate the template with the given keyword arguments.
|
||||
|
||||
@@ -81,7 +81,7 @@ class PromptTemplate:
|
||||
Each keyword corresponds to a placeholder in the template.
|
||||
|
||||
Returns:
|
||||
str: The populated template.
|
||||
The populated template.
|
||||
|
||||
Raises:
|
||||
ValueError: If an unknown placeholder is provided.
|
||||
|
@@ -4,7 +4,7 @@ from typing import Any, List, Type, Union
|
||||
from llama_index import SimpleDirectoryReader, download_loader
|
||||
from llama_index.readers.base import BaseReader
|
||||
|
||||
from ..base import BaseComponent, Document
|
||||
from kotaemon.base import BaseComponent, Document
|
||||
|
||||
|
||||
class AutoReader(BaseComponent):
|
||||
|
@@ -93,7 +93,7 @@ def get_rect_iou(gt_box: List[tuple], pd_box: List[tuple], iou_type=0) -> int:
|
||||
|
||||
# compute the intersection over union by taking the intersection
|
||||
# area and dividing it by the sum of prediction + ground-truth
|
||||
# areas - the interesection area
|
||||
# areas - the intersection area
|
||||
if iou_type == 0:
|
||||
iou = interArea / float(gt_area + pd_area - interArea)
|
||||
elif iou_type == 1:
|
||||
|
@@ -34,8 +34,7 @@ def read_pdf_unstructured(input_path: Union[Path, str]):
|
||||
from unstructured.partition.auto import partition
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Please install unstructured PDF reader \
|
||||
`pip install unstructured[pdf]`"
|
||||
"Please install unstructured PDF reader `pip install unstructured[pdf]`"
|
||||
)
|
||||
|
||||
page_items = defaultdict(list)
|
||||
@@ -60,7 +59,7 @@ def read_pdf_unstructured(input_path: Union[Path, str]):
|
||||
def merge_ocr_and_pdf_texts(
|
||||
ocr_list: List[dict], pdf_text_list: List[dict], debug_info=None
|
||||
):
|
||||
"""Merge PDF and OCR text using IOU overlaping location
|
||||
"""Merge PDF and OCR text using IOU overlapping location
|
||||
Args:
|
||||
ocr_list: List of OCR items {"text", "box", "location"}
|
||||
pdf_text_list: List of PDF items {"text", "box", "location"}
|
||||
@@ -115,7 +114,7 @@ def merge_ocr_and_pdf_texts(
|
||||
def merge_table_cell_and_ocr(
|
||||
table_list: List[dict], ocr_list: List[dict], pdf_list: List[dict], debug_info=None
|
||||
):
|
||||
"""Merge table items with OCR text using IOU overlaping location
|
||||
"""Merge table items with OCR text using IOU overlapping location
|
||||
Args:
|
||||
table_list: List of table items
|
||||
"type": ("table", "cell", "text"), "text", "box", "location"}
|
||||
@@ -123,7 +122,7 @@ def merge_table_cell_and_ocr(
|
||||
pdf_list: List of PDF items {"text", "box", "location"}
|
||||
|
||||
Returns:
|
||||
all_table_cells: List of tables, each of table is reprented
|
||||
all_table_cells: List of tables, each of table is represented
|
||||
by list of cells with combined text from OCR
|
||||
not_matched_items: List of PDF text which is not overlapped by table region
|
||||
"""
|
||||
|
@@ -100,11 +100,14 @@ class RegexExtractor(BaseComponent):
|
||||
A list contains the output ExtractorOutput for each input
|
||||
|
||||
Example:
|
||||
document1 = Document(...)
|
||||
document2 = Document(...)
|
||||
document_batch = [document1, document2]
|
||||
batch_output = self(document_batch)
|
||||
# batch_output will be [output1_document1, output1_document2]
|
||||
```pycon
|
||||
>>> document1 = Document(...)
|
||||
>>> document2 = Document(...)
|
||||
>>> document_batch = [document1, document2]
|
||||
>>> batch_output = self(document_batch)
|
||||
>>> print(batch_output)
|
||||
[output1_document1, output1_document2]
|
||||
```
|
||||
"""
|
||||
# TODO: this conversion seems common
|
||||
input_: list[str] = []
|
||||
|
Reference in New Issue
Block a user