fix: update setup instructions (#144) #none

* activate directory to gitignore * add my custom env to gitignore, will have to change that * add unstructured to kotaemon pyproject.toml * add .env to gitignore * remove .env from tracking * make changes to the run_macos script, update readme with more detailed instructions * remove my personal changes from gitignore * remove line from run_macos script * remove option for not installing miniconda for non technical users, mark docker dependency as optional * docs: update demo URL * gitignore changes * merge .env.example * revert changes to run_macos.sh * unstructured to advanced dependencies * add link to unstructured system dependencies * remove api key * fix: skip tests when unstructured pdf not installed * chore: loosen unstructured package version in pyproject.toml * chore: correct syntax --------- Co-authored-by: Tadashi <tadashi@cinnamon.is> Co-authored-by: cin-albert <albert@cinnamon.is>
2024-09-29 09:26:02 -06:00
parent 1522a3ab5a
commit f7b6f313b5
8 changed files with 42 additions and 16 deletions
--- a/libs/kotaemon/pyproject.toml
+++ b/libs/kotaemon/pyproject.toml
@@ -52,7 +52,7 @@ dependencies = [
    "python-dotenv>=1.0.1,<1.1",
    "tenacity>=8.2.3,<8.3",
    "theflow>=0.8.6,<0.9.0",
-    "trogon>=0.5.0,<0.6",
+    "trogon>=0.5.0,<0.6"
 ]
 readme = "README.md"
 authors = [
@@ -73,11 +73,14 @@ adv = [
    "fastembed",
    "googlesearch-python>=1.2.4,<1.3",
    "llama-cpp-python<0.2.8",
-    "sentence-transformers",
-    "wikipedia>=1.4.0,<1.5",
    "llama-index>=0.10.40,<0.11.0",
    "llama-index-vector-stores-milvus",
    "llama-index-vector-stores-qdrant",
+    "python-docx>=1.1.0,<1.2",
+    "sentence-transformers",
+    "tabulate",
+    "unstructured>=0.15.8,<0.16",
+    "wikipedia>=1.4.0,<1.5",
 ]
 dev = [
    "black",
--- a/libs/kotaemon/tests/conftest.py
+++ b/libs/kotaemon/tests/conftest.py
@@ -42,9 +42,10 @@ def if_sentence_fastembed_not_installed():
        return False


-def if_unstructured_not_installed():
+def if_unstructured_pdf_not_installed():
    try:
        import unstructured  # noqa: F401
+        from unstructured.partition.pdf import partition_pdf  # noqa: F401
    except ImportError:
        return True
    else:
@@ -81,8 +82,8 @@ skip_when_fastembed_not_installed = pytest.mark.skipif(
    if_sentence_fastembed_not_installed(), reason="fastembed is not installed"
 )

-skip_when_unstructured_not_installed = pytest.mark.skipif(
-    if_unstructured_not_installed(), reason="unstructured is not installed"
+skip_when_unstructured_pdf_not_installed = pytest.mark.skipif(
+    if_unstructured_pdf_not_installed(), reason="unstructured is not installed"
 )

 skip_when_cohere_not_installed = pytest.mark.skipif(
--- a/libs/kotaemon/tests/test_reader.py
+++ b/libs/kotaemon/tests/test_reader.py
@@ -14,7 +14,7 @@ from kotaemon.loaders import (
    UnstructuredReader,
 )

-from .conftest import skip_when_unstructured_not_installed
+from .conftest import skip_when_unstructured_pdf_not_installed


 def test_docx_reader():
@@ -54,7 +54,7 @@ def test_pdf_reader():
    assert len(nodes) > 0


-@skip_when_unstructured_not_installed
+@skip_when_unstructured_pdf_not_installed
 def test_unstructured_pdf_reader():
    reader = UnstructuredReader()
    dirpath = Path(__file__).parent
--- a/libs/kotaemon/tests/test_table_reader.py
+++ b/libs/kotaemon/tests/test_table_reader.py
@@ -5,7 +5,7 @@ import pytest

 from kotaemon.loaders import MathpixPDFReader, OCRReader, PandasExcelReader

-from .conftest import skip_when_unstructured_not_installed
+from .conftest import skip_when_unstructured_pdf_not_installed

 input_file = Path(__file__).parent / "resources" / "table.pdf"
 input_file_excel = Path(__file__).parent / "resources" / "dummy.xlsx"
@@ -28,7 +28,7 @@ def mathpix_output():
    return content


-@skip_when_unstructured_not_installed
+@skip_when_unstructured_pdf_not_installed
 def test_ocr_reader(fullocr_output):
    reader = OCRReader()
    documents = reader.load_data(input_file, response_content=fullocr_output)