kotaemon/Dockerfile
Tuan Anh Nguyen Dang (Tadashi_Cin) 3bd3830b8d
feat: sso login, demo mode & new mindmap support (#644) bump:minor
* fix: update .env.example

* feat: add SSO login

* fix: update flowsetting

* fix: add requirement

* fix: refine UI

* fix: update group id-based operation

* fix: improve citation logics

* fix: UI enhancement

* fix: user_id to string in models

* fix: improve chat suggestion UI and flow

* fix: improve group id handling

* fix: improve chat suggestion

* fix: secure download for single file

* fix: file limiting in docstore

* fix: improve chat suggestion logics & language conform

* feat: add markmap and select text to highlight function

* fix: update Dockerfile

* fix: user id auto generate

* fix: default user id

* feat: add demo mode

* fix: update flowsetting

* fix: revise default params for demo

* feat: sso_app alternative

* feat: sso login demo

* feat: demo specific customization

* feat: add login using API key

* fix: disable key-based login

* fix: optimize duplicate upload

* fix: gradio routing

* fix: disable arm build for demo

* fix: revise full-text search js logic

* feat: add rate limit

* fix: update Dockerfile with new launch script

* fix: update Dockerfile

* fix: update Dockerignore

* fix: update ratelimit logic

* fix: user_id in user management page

* fix: rename conv logic

* feat: update demo hint

* fix: minor fix

* fix: highlight on long PDF load

* feat: add HF paper list

* fix: update HF papers load logic

* feat: fly config

* fix: update fly config

* fix: update paper list pull api

* fix: minor update root routing

* fix: minor update root routing

* fix: simplify login flow & paper list UI

* feat: add paper recommendation

* fix: update Dockerfile

* fix: update Dockerfile

* fix: update default model

* feat: add long context Ollama through LCOllama

* feat: espose Gradio share to env

* fix: revert customized changes

* fix: list group at app load

* fix: relocate share conv button

* fix: update launch script

* fix: update Docker CI

* feat: add Ollama model selection at first setup

* docs: update README
2025-02-02 15:19:48 +07:00

115 lines
3.0 KiB
Docker

# Lite version
FROM python:3.10-slim AS lite
# Common dependencies
RUN apt-get update -qqy && \
apt-get install -y --no-install-recommends \
ssh \
git \
gcc \
g++ \
poppler-utils \
libpoppler-dev \
unzip \
curl \
cargo
# Setup args
ARG TARGETPLATFORM
ARG TARGETARCH
# Set environment variables
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1
ENV PYTHONIOENCODING=UTF-8
ENV TARGETARCH=${TARGETARCH}
# Create working directory
WORKDIR /app
# Download pdfjs
COPY scripts/download_pdfjs.sh /app/scripts/download_pdfjs.sh
RUN chmod +x /app/scripts/download_pdfjs.sh
ENV PDFJS_PREBUILT_DIR="/app/libs/ktem/ktem/assets/prebuilt/pdfjs-dist"
RUN bash scripts/download_pdfjs.sh $PDFJS_PREBUILT_DIR
# Copy contents
COPY . /app
COPY launch.sh /app/launch.sh
COPY .env.example /app/.env
# Install pip packages
RUN --mount=type=ssh \
--mount=type=cache,target=/root/.cache/pip \
pip install -e "libs/kotaemon" \
&& pip install -e "libs/ktem" \
&& pip install "pdfservices-sdk@git+https://github.com/niallcm/pdfservices-python-sdk.git@bump-and-unfreeze-requirements"
RUN --mount=type=ssh \
--mount=type=cache,target=/root/.cache/pip \
if [ "$TARGETARCH" = "amd64" ]; then pip install "graphrag<=0.3.6" future; fi
# Clean up
RUN apt-get autoremove \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* \
&& rm -rf ~/.cache
ENTRYPOINT ["sh", "/app/launch.sh"]
# Full version
FROM lite AS full
# Additional dependencies for full version
RUN apt-get update -qqy && \
apt-get install -y --no-install-recommends \
tesseract-ocr \
tesseract-ocr-jpn \
libsm6 \
libxext6 \
libreoffice \
ffmpeg \
libmagic-dev
# Install torch and torchvision for unstructured
RUN --mount=type=ssh \
--mount=type=cache,target=/root/.cache/pip \
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
# Install additional pip packages
RUN --mount=type=ssh \
--mount=type=cache,target=/root/.cache/pip \
pip install -e "libs/kotaemon[adv]" \
&& pip install unstructured[all-docs]
# Install lightRAG
ENV USE_LIGHTRAG=true
RUN --mount=type=ssh \
--mount=type=cache,target=/root/.cache/pip \
pip install aioboto3 nano-vectordb ollama xxhash "lightrag-hku<=0.0.8"
RUN --mount=type=ssh \
--mount=type=cache,target=/root/.cache/pip \
pip install "docling<=2.5.2"
# Clean up
RUN apt-get autoremove \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* \
&& rm -rf ~/.cache
ENTRYPOINT ["sh", "/app/launch.sh"]
# Ollama-bundled version
FROM full AS ollama
# Install ollama
RUN --mount=type=ssh \
--mount=type=cache,target=/root/.cache/pip \
curl -fsSL https://ollama.com/install.sh | sh
# RUN nohup bash -c "ollama serve &" && sleep 4 && ollama pull qwen2.5:7b
RUN nohup bash -c "ollama serve &" && sleep 4 && ollama pull nomic-embed-text
ENTRYPOINT ["sh", "/app/launch.sh"]