make default installation faster (#2)
* remove cohere as default * refractor dependencies * use llama-index pdf reader as default (pypdf) * fix some lazy docstring * update install scripts * minor fix
This commit is contained in:
parent
a8f92b3f9e
commit
d22ae88c7a
2
.github/workflows/unit-test.yaml
vendored
2
.github/workflows/unit-test.yaml
vendored
|
@ -89,7 +89,7 @@ jobs:
|
|||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
cd libs/kotaemon
|
||||
pip install -U --upgrade-strategy eager -e .[dev]
|
||||
pip install -U --upgrade-strategy eager -e .[all]
|
||||
|
||||
- name: New dependencies cache for key ${{ steps.restore-dependencies.outputs.cache-primary-key }}
|
||||
if: |
|
||||
|
|
|
@ -7,7 +7,6 @@ from kotaemon.base import BaseComponent, Document, Param
|
|||
from kotaemon.indices.extractors import BaseDocParser
|
||||
from kotaemon.indices.splitters import BaseSplitter, TokenSplitter
|
||||
from kotaemon.loaders import (
|
||||
AutoReader,
|
||||
DirectoryReader,
|
||||
MathpixPDFReader,
|
||||
OCRReader,
|
||||
|
@ -59,7 +58,7 @@ class DocumentIngestor(BaseComponent):
|
|||
file_extractors[ext] = cls()
|
||||
|
||||
if self.pdf_mode == "normal":
|
||||
file_extractors[".pdf"] = AutoReader("UnstructuredReader") # type: ignore
|
||||
pass # use default loader of llama-index which is pypdf
|
||||
elif self.pdf_mode == "ocr":
|
||||
file_extractors[".pdf"] = OCRReader()
|
||||
else:
|
||||
|
|
|
@ -55,7 +55,7 @@ class LIReaderMixin(BaseComponent):
|
|||
|
||||
def _get_wrapped_class(self) -> Type["LIBaseReader"]:
|
||||
raise NotImplementedError(
|
||||
"Please return the relevant Langchain class in in _get_lc_class"
|
||||
"Please return the relevant llama-index class in in _get_wrapped_class"
|
||||
)
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
|
|
|
@ -33,7 +33,7 @@ class DocxReader(BaseReader):
|
|||
"""Load data using Docx reader
|
||||
|
||||
Args:
|
||||
file_path (Path): Path to PDF file
|
||||
file_path (Path): Path to .docx file
|
||||
|
||||
Returns:
|
||||
List[Document]: list of documents extracted from the HTML file
|
||||
|
|
|
@ -37,7 +37,7 @@ class HtmlReader(BaseReader):
|
|||
"""Load data using Html reader
|
||||
|
||||
Args:
|
||||
file_path: path to pdf file
|
||||
file_path: path to HTML file
|
||||
extra_info: extra information passed to this reader during extracting data
|
||||
|
||||
Returns:
|
||||
|
|
|
@ -17,6 +17,8 @@ description = "Kotaemon core library for AI development."
|
|||
dependencies = [
|
||||
"langchain",
|
||||
"langchain-community",
|
||||
"langchain-openai",
|
||||
"openai",
|
||||
"theflow",
|
||||
"llama-index>=0.9.0,<0.10.0",
|
||||
"llama-hub",
|
||||
|
@ -27,6 +29,11 @@ dependencies = [
|
|||
"pandas",
|
||||
"trogon",
|
||||
"tenacity",
|
||||
"python-dotenv", # currently used to read configs from file, should be remove in the future
|
||||
"chromadb",
|
||||
"unstructured",
|
||||
"pypdf",
|
||||
"html2text",
|
||||
]
|
||||
readme = "README.md"
|
||||
license = { text = "MIT License" }
|
||||
|
@ -42,6 +49,18 @@ classifiers = [
|
|||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
adv = [
|
||||
"wikipedia",
|
||||
"duckduckgo-search",
|
||||
"googlesearch-python",
|
||||
"python-docx",
|
||||
"pytest-mock",
|
||||
"unstructured[pdf]",
|
||||
"sentence_transformers",
|
||||
"cohere",
|
||||
"elasticsearch",
|
||||
"llama-cpp-python",
|
||||
]
|
||||
dev = [
|
||||
"ipython",
|
||||
"pytest",
|
||||
|
@ -50,23 +69,8 @@ dev = [
|
|||
"flake8",
|
||||
"sphinx",
|
||||
"coverage",
|
||||
"openai",
|
||||
"langchain-openai",
|
||||
"chromadb",
|
||||
"wikipedia",
|
||||
"duckduckgo-search",
|
||||
"googlesearch-python",
|
||||
"python-docx",
|
||||
"python-dotenv",
|
||||
"pytest-mock",
|
||||
"unstructured[pdf]",
|
||||
"sentence_transformers",
|
||||
"cohere",
|
||||
"elasticsearch",
|
||||
"pypdf",
|
||||
"html2text",
|
||||
"llama-cpp-python",
|
||||
]
|
||||
all = ["kotaemon[adv,dev]"]
|
||||
|
||||
[project.scripts]
|
||||
kh = "kotaemon.cli:main"
|
||||
|
|
|
@ -25,7 +25,7 @@ from theflow.utils.modules import import_dotted_string
|
|||
from kotaemon.base import RetrievedDocument
|
||||
from kotaemon.indices import VectorIndexing, VectorRetrieval
|
||||
from kotaemon.indices.ingests import DocumentIngestor
|
||||
from kotaemon.indices.rankings import BaseReranking, CohereReranking, LLMReranking
|
||||
from kotaemon.indices.rankings import BaseReranking, LLMReranking
|
||||
|
||||
from .base import BaseFileIndexIndexing, BaseFileIndexRetriever
|
||||
|
||||
|
@ -67,9 +67,7 @@ class DocumentRetrievalPipeline(BaseFileIndexRetriever):
|
|||
vector_retrieval: VectorRetrieval = VectorRetrieval.withx(
|
||||
embedding=embeddings.get_default(),
|
||||
)
|
||||
reranker: BaseReranking = CohereReranking.withx(
|
||||
cohere_api_key=getattr(settings, "COHERE_API_KEY", "")
|
||||
) >> LLMReranking.withx(llm=llms.get_lowest_cost())
|
||||
reranker: BaseReranking = LLMReranking.withx(llm=llms.get_lowest_cost())
|
||||
get_extra_table: bool = False
|
||||
|
||||
def run(
|
||||
|
|
|
@ -13,18 +13,14 @@ version = "0.2.0"
|
|||
requires-python = ">= 3.10"
|
||||
description = "RAG-based Question and Answering Application"
|
||||
dependencies = [
|
||||
"chromadb",
|
||||
"click",
|
||||
"cohere",
|
||||
"platformdirs",
|
||||
"pluggy",
|
||||
"python-decouple",
|
||||
"python-dotenv",
|
||||
"python-pptx",
|
||||
"sqlalchemy",
|
||||
"sqlmodel",
|
||||
"tiktoken",
|
||||
"unstructured[pdf]",
|
||||
]
|
||||
readme = "README.md"
|
||||
license = { text = "MIT License" }
|
||||
|
|
|
@ -92,7 +92,7 @@ function install_dependencies() {
|
|||
if pip list 2>/dev/null | grep -q "kotaemon"; then
|
||||
echo "Requirements are already installed"
|
||||
else
|
||||
local kotaemon_root="$(pwd)/libs/kotaemon/.[dev]"
|
||||
local kotaemon_root="$(pwd)/libs/kotaemon"
|
||||
local ktem_root="$(pwd)/libs/ktem/"
|
||||
|
||||
echo "" && echo "Install kotaemon's requirements"
|
||||
|
|
|
@ -92,7 +92,7 @@ function install_dependencies() {
|
|||
if pip list 2>/dev/null | grep -q "kotaemon"; then
|
||||
echo "Requirements are already installed"
|
||||
else
|
||||
local kotaemon_root="$(pwd)/libs/kotaemon/.[dev]"
|
||||
local kotaemon_root="$(pwd)/libs/kotaemon"
|
||||
local ktem_root="$(pwd)/libs/ktem/"
|
||||
|
||||
echo "" && echo "Install kotaemon's requirements"
|
||||
|
|
|
@ -114,7 +114,7 @@ IF %ERRORLEVEL% == 0 (
|
|||
ECHO Dependencies are already installed
|
||||
) ELSE (
|
||||
ECHO Install kotaemon's requirements
|
||||
CALL python -m pip install -e "%CD%\libs\kotaemon\.[dev]"
|
||||
CALL python -m pip install -e "%CD%\libs\kotaemon"
|
||||
|
||||
ECHO Install ktem's requirements
|
||||
CALL python -m pip install -e "%CD%\libs\ktem"
|
||||
|
|
Loading…
Reference in New Issue
Block a user