Upgrade the declarative pipeline for cleaner interface (#51)

This commit is contained in:
Nguyen Trung Duc (john)
2023-10-24 11:12:22 +07:00
committed by GitHub
parent aab982ddc4
commit 9035e25666
26 changed files with 365 additions and 169 deletions

View File

@@ -69,8 +69,8 @@ class Thought(BaseComponent):
"variable placeholders, that then will be subsituted with real values when "
"this component is executed"
)
llm = Node(
default=AzureChatOpenAI, help="The LLM model to execute the input prompt"
llm: Node[BaseComponent] = Node(
AzureChatOpenAI, help="The LLM model to execute the input prompt"
)
post_process: Node[Compose] = Node(
help="The function post-processor that post-processes LLM output prediction ."
@@ -78,7 +78,7 @@ class Thought(BaseComponent):
"a dictionary, where the key should"
)
@Node.decorate(depends_on="prompt")
@Node.auto(depends_on="prompt")
def prompt_template(self):
"""Automatically wrap around param prompt. Can ignore"""
return BasePromptComponent(self.prompt)

View File

@@ -1,8 +1,10 @@
import os
from pathlib import Path
from typing import List, Optional, Union
from typing import Dict, List, Optional, Union
from theflow import Node, Param
from llama_index.readers.base import BaseReader
from theflow import Node
from theflow.utils.modules import ObjectInitDeclaration as _
from kotaemon.base import BaseComponent
from kotaemon.docstores import InMemoryDocumentStore
@@ -32,33 +34,22 @@ class ReaderIndexingPipeline(BaseComponent):
# Expose variables for users to switch in prompt ui
storage_path: Path = Path("./storage")
reader_name: str = "normal" # "normal" or "mathpix"
openai_api_base: str = "https://bleh-dummy-2.openai.azure.com/"
openai_api_key: str = os.environ.get("OPENAI_API_KEY", "")
chunk_size: int = 1024
chunk_overlap: int = 256
file_name_list: List[str] = list()
vector_store: _[InMemoryVectorStore] = _(InMemoryVectorStore)
doc_store: _[InMemoryDocumentStore] = _(InMemoryDocumentStore)
@Param.decorate()
def vector_store(self):
return InMemoryVectorStore()
@Param.decorate()
def doc_store(self):
doc_store = InMemoryDocumentStore()
return doc_store
@Node.decorate(depends_on=["openai_api_base", "openai_api_key"])
def embedding(self):
return AzureOpenAIEmbeddings(
model="text-embedding-ada-002",
deployment="dummy-q2-text-embedding",
openai_api_base=self.openai_api_base,
openai_api_key=self.openai_api_key,
)
embedding: AzureOpenAIEmbeddings = AzureOpenAIEmbeddings.withx(
model="text-embedding-ada-002",
deployment="dummy-q2-text-embedding",
openai_api_base="https://bleh-dummy-2.openai.azure.com/",
openai_api_key=os.environ.get("OPENAI_API_KEY", ""),
)
def get_reader(self, input_files: List[Union[str, Path]]):
# document parsers
file_extractor = {
file_extractor: Dict[str, BaseReader] = {
".xlsx": PandasExcelReader(),
}
if self.reader_name == "normal":
@@ -71,7 +62,7 @@ class ReaderIndexingPipeline(BaseComponent):
)
return main_reader
@Node.decorate(depends_on=["doc_store", "vector_store", "embedding"])
@Node.auto(depends_on=["doc_store", "vector_store", "embedding"])
def indexing_vector_pipeline(self):
return IndexVectorStoreFromDocumentPipeline(
doc_store=self.doc_store,
@@ -79,12 +70,9 @@ class ReaderIndexingPipeline(BaseComponent):
embedding=self.embedding,
)
@Node.decorate(depends_on=["chunk_size", "chunk_overlap"])
def text_splitter(self):
# chunking using NodeParser from llama-index
return SimpleNodeParser(
chunk_size=self.chunk_size, chunk_overlap=self.chunk_overlap
)
text_splitter: SimpleNodeParser = SimpleNodeParser.withx(
chunk_size=1024, chunk_overlap=256
)
def run(
self,

View File

@@ -3,6 +3,7 @@ from pathlib import Path
from typing import List
from theflow import Node, Param
from theflow.utils.modules import ObjectInitDeclaration as _
from kotaemon.base import BaseComponent
from kotaemon.docstores import InMemoryDocumentStore
@@ -25,8 +26,6 @@ class QuestionAnsweringPipeline(BaseComponent):
storage_path: Path = Path("./storage")
retrieval_top_k: int = 3
openai_api_base: str = "https://bleh-dummy-2.openai.azure.com/"
openai_api_key: str = os.environ.get("OPENAI_API_KEY", "")
file_name_list: List[str]
"""List of filename, incombination with storage_path to
create persistent path of vectorstore"""
@@ -35,37 +34,27 @@ class QuestionAnsweringPipeline(BaseComponent):
"The context is: \n{context}\nAnswer: "
)
@Node.decorate(depends_on=["openai_api_base", "openai_api_key"])
def llm(self):
return AzureChatOpenAI(
openai_api_base="https://bleh-dummy-2.openai.azure.com/",
openai_api_key=self.openai_api_key,
openai_api_version="2023-03-15-preview",
deployment_name="dummy-q2-gpt35",
temperature=0,
request_timeout=60,
)
llm: AzureChatOpenAI = AzureChatOpenAI.withx(
openai_api_base="https://bleh-dummy-2.openai.azure.com/",
openai_api_key=os.environ.get("OPENAI_API_KEY", ""),
openai_api_version="2023-03-15-preview",
deployment_name="dummy-q2-gpt35",
temperature=0,
request_timeout=60,
)
@Param.decorate()
def vector_store(self):
return InMemoryVectorStore()
vector_store: Param[InMemoryVectorStore] = Param(_(InMemoryVectorStore))
doc_store: Param[InMemoryDocumentStore] = Param(_(InMemoryDocumentStore))
@Param.decorate()
def doc_store(self):
doc_store = InMemoryDocumentStore()
return doc_store
embedding: AzureOpenAIEmbeddings = AzureOpenAIEmbeddings.withx(
model="text-embedding-ada-002",
deployment="dummy-q2-text-embedding",
openai_api_base="https://bleh-dummy-2.openai.azure.com/",
openai_api_key=os.environ.get("OPENAI_API_KEY", ""),
)
@Node.decorate(depends_on=["openai_api_base", "openai_api_key"])
def embedding(self):
return AzureOpenAIEmbeddings(
model="text-embedding-ada-002",
deployment="dummy-q2-text-embedding",
openai_api_base=self.openai_api_base,
openai_api_key=self.openai_api_key,
)
@Node.decorate(depends_on=["doc_store", "vector_store", "embedding"])
def retrieving_pipeline(self):
@Node.default()
def retrieving_pipeline(self) -> RetrieveDocumentFromVectorStorePipeline:
retrieving_pipeline = RetrieveDocumentFromVectorStorePipeline(
vector_store=self.vector_store,
doc_store=self.doc_store,

View File

@@ -32,5 +32,5 @@ class LLMTool(BaseTool):
response = self.llm(query)
except ValueError:
raise ToolException("LLM Tool call failed")
output = response.text[0]
output = response.text
return output