From a865e2b0950614a3f427967daf77cc07f22daac4 Mon Sep 17 00:00:00 2001 From: Khoi-Nguyen Nguyen-Ngoc <146712657+cin-niko@users.noreply.github.com> Date: Sat, 21 Sep 2024 12:11:58 +0700 Subject: [PATCH] feat: modify base dependencies + remove unnecessary packages in lite docker (#310) * feat: update base/adv dependencies * feat: update Dockerfile * ci: update free disk for docker build --- .github/workflows/build-push-docker.yaml | 32 ++++++++++++------------ Dockerfile | 5 ++-- libs/kotaemon/pyproject.toml | 24 +++++++++--------- 3 files changed, 31 insertions(+), 30 deletions(-) diff --git a/.github/workflows/build-push-docker.yaml b/.github/workflows/build-push-docker.yaml index 98f3233..26a214e 100644 --- a/.github/workflows/build-push-docker.yaml +++ b/.github/workflows/build-push-docker.yaml @@ -29,6 +29,22 @@ jobs: - lite - full steps: + - name: Free Disk Space (Ubuntu) + uses: jlumbroso/free-disk-space@main + with: + # this might remove tools that are actually needed, + # if set to "true" but frees about 6 GB + tool-cache: true + + # all of these default to true, but feel free to set to + # "false" if necessary for your workflow + android: true + dotnet: true + haskell: true + large-packages: true + docker-images: true + swap-storage: true + - name: Set repository and image name run: | echo "FULL_IMAGE_NAME=${{ env.REGISTRY }}/${IMAGE_NAME,,}" >>${GITHUB_ENV} @@ -72,22 +88,6 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - - name: Free Disk Space (Ubuntu) - uses: jlumbroso/free-disk-space@main - with: - # this might remove tools that are actually needed, - # if set to "true" but frees about 6 GB - tool-cache: false - - # all of these default to true, but feel free to set to - # "false" if necessary for your workflow - android: true - dotnet: true - haskell: true - large-packages: true - docker-images: true - swap-storage: true - - name: Build docker image uses: docker/build-push-action@v6 with: diff --git a/Dockerfile b/Dockerfile index 14bf651..ecf313a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -34,7 +34,7 @@ COPY . /app # Install pip packages RUN --mount=type=ssh \ --mount=type=cache,target=/root/.cache/pip \ - pip install -e "libs/kotaemon[all]" \ + pip install -e "libs/kotaemon" \ && pip install -e "libs/ktem" \ && pip install graphrag future \ && pip install "pdfservices-sdk@git+https://github.com/niallcm/pdfservices-python-sdk.git@bump-and-unfreeze-requirements" @@ -72,7 +72,8 @@ COPY . /app # Install additional pip packages RUN --mount=type=ssh \ --mount=type=cache,target=/root/.cache/pip \ - pip install unstructured[all-docs] + pip install -e "libs/kotaemon[adv]" \ + && pip install unstructured[all-docs] # Clean up RUN apt-get autoremove \ diff --git a/libs/kotaemon/pyproject.toml b/libs/kotaemon/pyproject.toml index 78567c7..d39395e 100644 --- a/libs/kotaemon/pyproject.toml +++ b/libs/kotaemon/pyproject.toml @@ -21,28 +21,34 @@ dynamic = ["version"] requires-python = ">= 3.10" description = "Kotaemon core library for AI development." dependencies = [ + "azure-ai-documentintelligence", + "beautifulsoup4>=4.12.3,<4.13", "click>=8.1.7,<9", "cohere>=5.3.2,<6", "cookiecutter>=2.6.0,<2.7", "fast_langdetect", + "fastapi<=0.112.1", "gradio>=4.31.0,<4.40", "html2text==2024.2.26", "langchain>=0.1.16,<0.2.0", + "langchain-anthropic", "langchain-community>=0.0.34,<0.1.0", "langchain-openai>=0.1.4,<0.2.0", - "langchain-anthropic", "llama-hub>=0.0.79,<0.1.0", "llama-index>=0.10.40,<0.11.0", - "fastapi<=0.112.1", "llama-index-vector-stores-chroma>=0.1.9", "llama-index-vector-stores-lancedb", "llama-index-vector-stores-milvus", + "llama-index-vector-stores-qdrant", "openai>=1.23.6,<2", "openpyxl>=3.1.2,<3.2", + "opentelemetry-exporter-otlp-proto-grpc>=1.25.0", # https://github.com/chroma-core/chroma/issues/2571 "pandas>=2.2.2,<2.3", "plotly", "PyMuPDF>=1.23", "pypdf>=4.2.0,<4.3", + "python-decouple", # for theflow + "python-docx>=1.1.0,<1.2", "python-dotenv>=1.0.1,<1.1", "tenacity>=8.2.3,<8.3", "theflow>=0.8.6,<0.9.0", @@ -62,18 +68,13 @@ classifiers = [ [project.optional-dependencies] adv = [ - "azure-ai-documentintelligence", - "beautifulsoup4>=4.12.3,<4.13", "duckduckgo-search>=6.1.0,<6.2", "elasticsearch>=8.13.0,<8.14", - "googlesearch-python>=1.2.4,<1.3", - "python-docx>=1.1.0,<1.2", - "tabulate", - "wikipedia>=1.4.0,<1.5", - "sentence-transformers", - "llama-cpp-python<0.2.8", "fastembed", - "llama-index-vector-stores-qdrant", + "googlesearch-python>=1.2.4,<1.3", + "llama-cpp-python<0.2.8", + "sentence-transformers", + "wikipedia>=1.4.0,<1.5", ] dev = [ "black", @@ -83,7 +84,6 @@ dev = [ "pre-commit", "pytest", "pytest-mock", - "python-decouple", "sphinx", ] all = ["kotaemon[adv,dev]"]