fix: add optional graphrag toggle in dockerfile (#377)

* fix: toggle graphrag install in Docker build * fix: update Dockerfile * fix: remove unused logics in chat_fn * fix: disable duckduckgo test due to API limit
2024-10-10 16:09:57 +07:00
parent 3ff6af8acf
commit 6da9db489f
5 changed files with 39 additions and 132 deletions
--- a/.github/workflows/build-push-docker.yaml
+++ b/.github/workflows/build-push-docker.yaml
@@ -88,16 +88,34 @@ jobs:
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}
-      - name: Build docker image
+      - name: Build docker image (amd64)
        uses: docker/build-push-action@v6
        with:
          file: Dockerfile
          context: .
          push: true
-          platforms: linux/amd64,linux/arm64
+          platforms: linux/amd64
          tags: |
            ${{ steps.meta.outputs.tags }}
          labels: ${{ steps.meta.outputs.labels }}
          target: ${{ matrix.target }}
          cache-from: type=gha
          cache-to: type=gha,mode=max
          build-args: |
            ENABLE_GRAPHRAG=true
      - name: Build docker image (arm64)
        uses: docker/build-push-action@v6
        with:
          file: Dockerfile
          context: .
          push: true
          platforms: linux/arm64
          tags: |
            ${{ steps.meta.outputs.tags }}
          labels: ${{ steps.meta.outputs.labels }}
          target: ${{ matrix.target }}
          cache-from: type=gha
          cache-to: type=gha,mode=max
          build-args: |
            ENABLE_GRAPHRAG=false
--- a/14
+++ b/14
@@ -14,10 +14,14 @@ RUN apt-get update -qqy && \
      curl \
      cargo
 # Setup args
 ARG ENABLE_GRAPHRAG=true
 # Set environment variables
 ENV PYTHONDONTWRITEBYTECODE=1
 ENV PYTHONUNBUFFERED=1
 ENV PYTHONIOENCODING=UTF-8
 ENV ENABLE_GRAPHRAG=${ENABLE_GRAPHRAG}
 # Create working directory
 WORKDIR /app
@@ -30,15 +34,19 @@ RUN bash scripts/download_pdfjs.sh $PDFJS_PREBUILT_DIR
 # Copy contents
 COPY . /app
 COPY .env.example /app/.env
 # Install pip packages
 RUN --mount=type=ssh  \
    --mount=type=cache,target=/root/.cache/pip  \
    pip install -e "libs/kotaemon" \
    && pip install -e "libs/ktem" \
    && pip install graphrag future \
    && pip install "pdfservices-sdk@git+https://github.com/niallcm/pdfservices-python-sdk.git@bump-and-unfreeze-requirements"
 RUN --mount=type=ssh  \
    --mount=type=cache,target=/root/.cache/pip  \
    if [ "$ENABLE_GRAPHRAG" = "true" ]; then pip install graphrag future; fi
 # Clean up
 RUN apt-get autoremove \
    && apt-get clean \
@@ -66,10 +74,6 @@ RUN --mount=type=ssh  \
    --mount=type=cache,target=/root/.cache/pip  \
    pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
 # Copy contents
 COPY . /app
 COPY .env.example /app/.env
 # Install additional pip packages
 RUN --mount=type=ssh  \
    --mount=type=cache,target=/root/.cache/pip  \
--- a/libs/kotaemon/tests/test_agent.py
+++ b/libs/kotaemon/tests/test_agent.py
@@ -98,15 +98,15 @@ _openai_chat_completion_responses_react_langchain_tool = [
            "Action: wikipedia\n"
            "Action Input: Cinnamon AI company\n"
        ),
-        (
+        # (
-            "The information retrieved from Wikipedia is not "
+        #     "The information retrieved from Wikipedia is not "
-            "about Cinnamon AI company, but about Blue Prism, "
+        #     "about Cinnamon AI company, but about Blue Prism, "
-            "a British multinational software corporation. "
+        #     "a British multinational software corporation. "
-            "I need to try another source to gather information "
+        #     "I need to try another source to gather information "
-            "about Cinnamon AI company.\n"
+        #     "about Cinnamon AI company.\n"
-            "Action: duckduckgo_search\n"
+        #     "Action: duckduckgo_search\n"
-            "Action Input: Cinnamon AI company\n"
+        #     "Action Input: Cinnamon AI company\n"
-        ),
+        # ),
        FINAL_RESPONSE_TEXT,
    ]
 ]
--- a/libs/ktem/ktem/pages/chat/init.py
+++ b/libs/ktem/ktem/pages/chat/init.py
@@ -1,14 +1,10 @@
 import asyncio
 import csv
 import json
 import re
 from copy import deepcopy
 from datetime import datetime
 from pathlib import Path
 from typing import Optional
 import gradio as gr
 from filelock import FileLock
 from ktem.app import BasePage
 from ktem.components import reasonings
 from ktem.db.models import Conversation, engine
@@ -269,10 +265,6 @@ class ChatPage(BasePage):
                    self._suggestion_updated,
                    self._app.user_id,
                ],
                outputs=[
                    self.chat_control.conversation,
                    self.chat_control.conversation,
                ],
                show_progress="hidden",
            )
@@ -372,10 +364,6 @@ class ChatPage(BasePage):
                    self._suggestion_updated,
                    self._app.user_id,
                ],
                outputs=[
                    self.chat_control.conversation,
                    self.chat_control.conversation,
                ],
                show_progress="hidden",
            )
@@ -995,96 +983,3 @@ class ChatPage(BasePage):
                    pass
        return suggested_ques, updated
    def backup_original_info(
        self, chat_history, settings, info_pannel, original_chat_history
    ):
        original_chat_history.append(chat_history[-1])
        return original_chat_history, settings, info_pannel
    def save_log(
        self,
        conversation_id,
        chat_history,
        settings,
        info_panel,
        original_chat_history,
        original_settings,
        original_info_panel,
        log_dir,
    ):
        if not Path(log_dir).exists():
            Path(log_dir).mkdir(parents=True)
        lock = FileLock(Path(log_dir) / ".lock")
        # get current date
        today = datetime.now()
        formatted_date = today.strftime("%d%m%Y_%H")
        with Session(engine) as session:
            statement = select(Conversation).where(Conversation.id == conversation_id)
            result = session.exec(statement).one()
            data_source = deepcopy(result.data_source)
            likes = data_source.get("likes", [])
            if not likes:
                return
        feedback = likes[-1][-1]
        message_index = likes[-1][0]
        current_message = chat_history[message_index[0]]
        original_message = original_chat_history[message_index[0]]
        is_original = all(
            [
                current_item == original_item
                for current_item, original_item in zip(
                    current_message, original_message
                )
            ]
        )
        dataframe = [
            [
                conversation_id,
                message_index,
                current_message[0],
                current_message[1],
                chat_history,
                settings,
                info_panel,
                feedback,
                is_original,
                original_message[1],
                original_chat_history,
                original_settings,
                original_info_panel,
            ]
        ]
        with lock:
            log_file = Path(log_dir) / f"{formatted_date}_log.csv"
            is_log_file_exist = log_file.is_file()
            with open(log_file, "a") as f:
                writer = csv.writer(f)
                # write headers
                if not is_log_file_exist:
                    writer.writerow(
                        [
                            "Conversation ID",
                            "Message ID",
                            "Question",
                            "Answer",
                            "Chat History",
                            "Settings",
                            "Evidences",
                            "Feedback",
                            "Original/ Rewritten",
                            "Original Answer",
                            "Original Chat History",
                            "Original Settings",
                            "Original Evidences",
                        ]
                    )
                writer.writerows(dataframe)
--- a/libs/ktem/ktem/pages/chat/control.py
+++ b/libs/ktem/ktem/pages/chat/control.py
@@ -326,11 +326,7 @@ class ConversationControl(BasePage):
    ):
        """Update the conversation's chat suggestions"""
        if not is_updated:
-            return (
+            return
                gr.update(),
                conversation_id,
                gr.update(visible=False),
            )
        if user_id is None:
            gr.Warning("Please sign in first (Settings → User Settings)")
@@ -353,13 +349,7 @@ class ConversationControl(BasePage):
            session.add(result)
            session.commit()
        history = self.load_chat_history(user_id)
        gr.Info("Chat suggestions updated.")
        return (
            gr.update(choices=history),
            conversation_id,
            gr.update(visible=False),
        )
    def _on_app_created(self):
        """Reload the conversation once the app is created"""