fix: add optional graphrag toggle in dockerfile (#377)

* fix: toggle graphrag install in Docker build * fix: update Dockerfile * fix: remove unused logics in chat_fn * fix: disable duckduckgo test due to API limit
2024-10-10 16:09:57 +07:00
parent 3ff6af8acf
commit 6da9db489f
5 changed files with 39 additions and 132 deletions
--- a/.github/workflows/build-push-docker.yaml
+++ b/.github/workflows/build-push-docker.yaml
@@ -88,16 +88,34 @@ jobs:
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}

-      - name: Build docker image
+      - name: Build docker image (amd64)
        uses: docker/build-push-action@v6
        with:
          file: Dockerfile
          context: .
          push: true
-          platforms: linux/amd64,linux/arm64
+          platforms: linux/amd64
          tags: |
            ${{ steps.meta.outputs.tags }}
          labels: ${{ steps.meta.outputs.labels }}
          target: ${{ matrix.target }}
          cache-from: type=gha
          cache-to: type=gha,mode=max
+          build-args: |
+            ENABLE_GRAPHRAG=true
+
+      - name: Build docker image (arm64)
+        uses: docker/build-push-action@v6
+        with:
+          file: Dockerfile
+          context: .
+          push: true
+          platforms: linux/arm64
+          tags: |
+            ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          target: ${{ matrix.target }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+          build-args: |
+            ENABLE_GRAPHRAG=false
--- a/14
+++ b/14
@@ -14,10 +14,14 @@ RUN apt-get update -qqy && \
      curl \
      cargo

+# Setup args
+ARG ENABLE_GRAPHRAG=true
+
 # Set environment variables
 ENV PYTHONDONTWRITEBYTECODE=1
 ENV PYTHONUNBUFFERED=1
 ENV PYTHONIOENCODING=UTF-8
+ENV ENABLE_GRAPHRAG=${ENABLE_GRAPHRAG}

 # Create working directory
 WORKDIR /app
@@ -30,15 +34,19 @@ RUN bash scripts/download_pdfjs.sh $PDFJS_PREBUILT_DIR

 # Copy contents
 COPY . /app
+COPY .env.example /app/.env

 # Install pip packages
 RUN --mount=type=ssh  \
    --mount=type=cache,target=/root/.cache/pip  \
    pip install -e "libs/kotaemon" \
    && pip install -e "libs/ktem" \
-    && pip install graphrag future \
    && pip install "pdfservices-sdk@git+https://github.com/niallcm/pdfservices-python-sdk.git@bump-and-unfreeze-requirements"

+RUN --mount=type=ssh  \
+    --mount=type=cache,target=/root/.cache/pip  \
+    if [ "$ENABLE_GRAPHRAG" = "true" ]; then pip install graphrag future; fi
+
 # Clean up
 RUN apt-get autoremove \
    && apt-get clean \
@@ -66,10 +74,6 @@ RUN --mount=type=ssh  \
    --mount=type=cache,target=/root/.cache/pip  \
    pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu

-# Copy contents
-COPY . /app
-COPY .env.example /app/.env
-
 # Install additional pip packages
 RUN --mount=type=ssh  \
    --mount=type=cache,target=/root/.cache/pip  \
--- a/libs/kotaemon/tests/test_agent.py
+++ b/libs/kotaemon/tests/test_agent.py
@@ -98,15 +98,15 @@ _openai_chat_completion_responses_react_langchain_tool = [
            "Action: wikipedia\n"
            "Action Input: Cinnamon AI company\n"
        ),
-        (
-            "The information retrieved from Wikipedia is not "
-            "about Cinnamon AI company, but about Blue Prism, "
-            "a British multinational software corporation. "
-            "I need to try another source to gather information "
-            "about Cinnamon AI company.\n"
-            "Action: duckduckgo_search\n"
-            "Action Input: Cinnamon AI company\n"
-        ),
+        # (
+        #     "The information retrieved from Wikipedia is not "
+        #     "about Cinnamon AI company, but about Blue Prism, "
+        #     "a British multinational software corporation. "
+        #     "I need to try another source to gather information "
+        #     "about Cinnamon AI company.\n"
+        #     "Action: duckduckgo_search\n"
+        #     "Action Input: Cinnamon AI company\n"
+        # ),
        FINAL_RESPONSE_TEXT,
    ]
 ]
--- a/libs/ktem/ktem/pages/chat/init.py
+++ b/libs/ktem/ktem/pages/chat/init.py
@@ -1,14 +1,10 @@
 import asyncio
-import csv
 import json
 import re
 from copy import deepcopy
-from datetime import datetime
-from pathlib import Path
 from typing import Optional

 import gradio as gr
-from filelock import FileLock
 from ktem.app import BasePage
 from ktem.components import reasonings
 from ktem.db.models import Conversation, engine
@@ -269,10 +265,6 @@ class ChatPage(BasePage):
                    self._suggestion_updated,
                    self._app.user_id,
                ],
-                outputs=[
-                    self.chat_control.conversation,
-                    self.chat_control.conversation,
-                ],
                show_progress="hidden",
            )

@@ -372,10 +364,6 @@ class ChatPage(BasePage):
                    self._suggestion_updated,
                    self._app.user_id,
                ],
-                outputs=[
-                    self.chat_control.conversation,
-                    self.chat_control.conversation,
-                ],
                show_progress="hidden",
            )

@@ -995,96 +983,3 @@ class ChatPage(BasePage):
                    pass

        return suggested_ques, updated
-
-    def backup_original_info(
-        self, chat_history, settings, info_pannel, original_chat_history
-    ):
-        original_chat_history.append(chat_history[-1])
-        return original_chat_history, settings, info_pannel
-
-    def save_log(
-        self,
-        conversation_id,
-        chat_history,
-        settings,
-        info_panel,
-        original_chat_history,
-        original_settings,
-        original_info_panel,
-        log_dir,
-    ):
-        if not Path(log_dir).exists():
-            Path(log_dir).mkdir(parents=True)
-
-        lock = FileLock(Path(log_dir) / ".lock")
-        # get current date
-        today = datetime.now()
-        formatted_date = today.strftime("%d%m%Y_%H")
-
-        with Session(engine) as session:
-            statement = select(Conversation).where(Conversation.id == conversation_id)
-            result = session.exec(statement).one()
-
-            data_source = deepcopy(result.data_source)
-            likes = data_source.get("likes", [])
-            if not likes:
-                return
-
-        feedback = likes[-1][-1]
-        message_index = likes[-1][0]
-
-        current_message = chat_history[message_index[0]]
-        original_message = original_chat_history[message_index[0]]
-        is_original = all(
-            [
-                current_item == original_item
-                for current_item, original_item in zip(
-                    current_message, original_message
-                )
-            ]
-        )
-
-        dataframe = [
-            [
-                conversation_id,
-                message_index,
-                current_message[0],
-                current_message[1],
-                chat_history,
-                settings,
-                info_panel,
-                feedback,
-                is_original,
-                original_message[1],
-                original_chat_history,
-                original_settings,
-                original_info_panel,
-            ]
-        ]
-
-        with lock:
-            log_file = Path(log_dir) / f"{formatted_date}_log.csv"
-            is_log_file_exist = log_file.is_file()
-            with open(log_file, "a") as f:
-                writer = csv.writer(f)
-                # write headers
-                if not is_log_file_exist:
-                    writer.writerow(
-                        [
-                            "Conversation ID",
-                            "Message ID",
-                            "Question",
-                            "Answer",
-                            "Chat History",
-                            "Settings",
-                            "Evidences",
-                            "Feedback",
-                            "Original/ Rewritten",
-                            "Original Answer",
-                            "Original Chat History",
-                            "Original Settings",
-                            "Original Evidences",
-                        ]
-                    )
-
-                writer.writerows(dataframe)
--- a/libs/ktem/ktem/pages/chat/control.py
+++ b/libs/ktem/ktem/pages/chat/control.py
@@ -326,11 +326,7 @@ class ConversationControl(BasePage):
    ):
        """Update the conversation's chat suggestions"""
        if not is_updated:
-            return (
-                gr.update(),
-                conversation_id,
-                gr.update(visible=False),
-            )
+            return

        if user_id is None:
            gr.Warning("Please sign in first (Settings → User Settings)")
@@ -353,13 +349,7 @@ class ConversationControl(BasePage):
            session.add(result)
            session.commit()

-        history = self.load_chat_history(user_id)
        gr.Info("Chat suggestions updated.")
-        return (
-            gr.update(choices=history),
-            conversation_id,
-            gr.update(visible=False),
-        )

    def _on_app_created(self):
        """Reload the conversation once the app is created"""