fix: update setup instructions (#144) #none
* activate directory to gitignore * add my custom env to gitignore, will have to change that * add unstructured to kotaemon pyproject.toml * add .env to gitignore * remove .env from tracking * make changes to the run_macos script, update readme with more detailed instructions * remove my personal changes from gitignore * remove line from run_macos script * remove option for not installing miniconda for non technical users, mark docker dependency as optional * docs: update demo URL * gitignore changes * merge .env.example * revert changes to run_macos.sh * unstructured to advanced dependencies * add link to unstructured system dependencies * remove api key * fix: skip tests when unstructured pdf not installed * chore: loosen unstructured package version in pyproject.toml * chore: correct syntax --------- Co-authored-by: Tadashi <tadashi@cinnamon.is> Co-authored-by: cin-albert <albert@cinnamon.is>
This commit is contained in:
@@ -52,7 +52,7 @@ dependencies = [
|
||||
"python-dotenv>=1.0.1,<1.1",
|
||||
"tenacity>=8.2.3,<8.3",
|
||||
"theflow>=0.8.6,<0.9.0",
|
||||
"trogon>=0.5.0,<0.6",
|
||||
"trogon>=0.5.0,<0.6"
|
||||
]
|
||||
readme = "README.md"
|
||||
authors = [
|
||||
@@ -73,11 +73,14 @@ adv = [
|
||||
"fastembed",
|
||||
"googlesearch-python>=1.2.4,<1.3",
|
||||
"llama-cpp-python<0.2.8",
|
||||
"sentence-transformers",
|
||||
"wikipedia>=1.4.0,<1.5",
|
||||
"llama-index>=0.10.40,<0.11.0",
|
||||
"llama-index-vector-stores-milvus",
|
||||
"llama-index-vector-stores-qdrant",
|
||||
"python-docx>=1.1.0,<1.2",
|
||||
"sentence-transformers",
|
||||
"tabulate",
|
||||
"unstructured>=0.15.8,<0.16",
|
||||
"wikipedia>=1.4.0,<1.5",
|
||||
]
|
||||
dev = [
|
||||
"black",
|
||||
|
@@ -42,9 +42,10 @@ def if_sentence_fastembed_not_installed():
|
||||
return False
|
||||
|
||||
|
||||
def if_unstructured_not_installed():
|
||||
def if_unstructured_pdf_not_installed():
|
||||
try:
|
||||
import unstructured # noqa: F401
|
||||
from unstructured.partition.pdf import partition_pdf # noqa: F401
|
||||
except ImportError:
|
||||
return True
|
||||
else:
|
||||
@@ -81,8 +82,8 @@ skip_when_fastembed_not_installed = pytest.mark.skipif(
|
||||
if_sentence_fastembed_not_installed(), reason="fastembed is not installed"
|
||||
)
|
||||
|
||||
skip_when_unstructured_not_installed = pytest.mark.skipif(
|
||||
if_unstructured_not_installed(), reason="unstructured is not installed"
|
||||
skip_when_unstructured_pdf_not_installed = pytest.mark.skipif(
|
||||
if_unstructured_pdf_not_installed(), reason="unstructured is not installed"
|
||||
)
|
||||
|
||||
skip_when_cohere_not_installed = pytest.mark.skipif(
|
||||
|
@@ -14,7 +14,7 @@ from kotaemon.loaders import (
|
||||
UnstructuredReader,
|
||||
)
|
||||
|
||||
from .conftest import skip_when_unstructured_not_installed
|
||||
from .conftest import skip_when_unstructured_pdf_not_installed
|
||||
|
||||
|
||||
def test_docx_reader():
|
||||
@@ -54,7 +54,7 @@ def test_pdf_reader():
|
||||
assert len(nodes) > 0
|
||||
|
||||
|
||||
@skip_when_unstructured_not_installed
|
||||
@skip_when_unstructured_pdf_not_installed
|
||||
def test_unstructured_pdf_reader():
|
||||
reader = UnstructuredReader()
|
||||
dirpath = Path(__file__).parent
|
||||
|
@@ -5,7 +5,7 @@ import pytest
|
||||
|
||||
from kotaemon.loaders import MathpixPDFReader, OCRReader, PandasExcelReader
|
||||
|
||||
from .conftest import skip_when_unstructured_not_installed
|
||||
from .conftest import skip_when_unstructured_pdf_not_installed
|
||||
|
||||
input_file = Path(__file__).parent / "resources" / "table.pdf"
|
||||
input_file_excel = Path(__file__).parent / "resources" / "dummy.xlsx"
|
||||
@@ -28,7 +28,7 @@ def mathpix_output():
|
||||
return content
|
||||
|
||||
|
||||
@skip_when_unstructured_not_installed
|
||||
@skip_when_unstructured_pdf_not_installed
|
||||
def test_ocr_reader(fullocr_output):
|
||||
reader = OCRReader()
|
||||
documents = reader.load_data(input_file, response_content=fullocr_output)
|
||||
|
Reference in New Issue
Block a user