Feat/local endpoint llm (#148)

* serve local model in a different process from the app
---------

Co-authored-by: albert <albert@cinnamon.is>
Co-authored-by: trducng <trungduc1992@gmail.com>
This commit is contained in:
ian_Cin
2024-03-15 16:17:33 +07:00
committed by GitHub
parent 2950e6ed02
commit df12dec732
20 changed files with 675 additions and 79 deletions

View File

@@ -118,7 +118,7 @@ class DocumentRetrievalPipeline(BaseFileIndexRetriever):
# rerank
docs = self.vector_retrieval(text=text, top_k=top_k, **kwargs)
if self.get_from_path("reranker"):
if docs and self.get_from_path("reranker"):
docs = self.reranker(docs, query=text)
if not self.get_extra_table:

View File

@@ -200,24 +200,37 @@ class AnswerWithContextPipeline(BaseComponent):
lang=self.lang,
)
citation_task = asyncio.create_task(
self.citation_pipeline.ainvoke(context=evidence, question=question)
)
print("Citation task created")
if evidence:
citation_task = asyncio.create_task(
self.citation_pipeline.ainvoke(context=evidence, question=question)
)
print("Citation task created")
messages = []
if self.system_prompt:
messages.append(SystemMessage(content=self.system_prompt))
messages.append(HumanMessage(content=prompt))
output = ""
for text in self.llm.stream(messages):
output += text.text
self.report_output({"output": text.text})
await asyncio.sleep(0)
try:
# try streaming first
print("Trying LLM streaming")
for text in self.llm.stream(messages):
output += text.text
self.report_output({"output": text.text})
await asyncio.sleep(0)
except NotImplementedError:
print("Streaming is not supported, falling back to normal processing")
output = self.llm(messages).text
self.report_output({"output": output})
# retrieve the citation
print("Waiting for citation task")
citation = await citation_task
if evidence:
citation = await citation_task
else:
citation = None
answer = Document(text=output, metadata={"citation": citation})
return answer