[AUR-405] Auto-generate markdown documentation from pipeline (#33)

* Create a script to auto-generate markdown docs from pipeline * Clean up documentation for Chain-of-Thought
2023-10-04 10:54:24 +07:00
parent 6ab1854532
commit 49ed3f6994
5 changed files with 109 additions and 8 deletions
--- a/knowledgehub/base.py
+++ b/knowledgehub/base.py
@@ -8,12 +8,13 @@ class BaseComponent(Compose):

    A component is a class that can be used to compose a pipeline. To use the
    component, you should implement the following methods:
-        - run_raw: run on raw input
-        - run_batch_raw: run on batch of raw input
-        - run_document: run on document
-        - run_batch_document: run on batch of documents
-        - is_document: check if input is document
-        - is_batch: check if input is batch
+
+    - run_raw: run on raw input
+    - run_batch_raw: run on batch of raw input
+    - run_document: run on document
+    - run_batch_document: run on batch of documents
+    - is_document: check if input is document
+    - is_batch: check if input is batch
    """

    inflow = None
--- a/knowledgehub/cli.py
+++ b/knowledgehub/cli.py
@@ -52,6 +52,29 @@ def run(run_path):
    check_config_format(run_path)


+@main.command()
+@click.argument("module", required=True)
+@click.option(
+    "--output", default="docs.md", required=False, help="The output markdown file"
+)
+@click.option(
+    "--separation-level", required=False, default=1, help="Organize markdown layout"
+)
+def makedoc(module, output, separation_level):
+    """Make documentation for module `module`
+
+    Example:
+
+        \b
+        # Make component documentation for kotaemon library
+        $ kh makedoc kotaemon
+    """
+    from kotaemon.contribs.docs import make_doc
+
+    make_doc(module, output, separation_level)
+    print(f"Documentation exported to {output}")
+
+
@main.command()
@click.option(
    "--template",
--- a/knowledgehub/contribs/docs.py
+++ b/knowledgehub/contribs/docs.py
@@ -0,0 +1,66 @@
+import inspect
+from collections import defaultdict
+
+from theflow.utils.documentation import get_compose_documentation_from_module
+
+
+def from_definition_to_markdown(definition: dict) -> str:
+    """From definition to markdown"""
+
+    # Handle params
+    params = " N/A\n"
+    if definition["params"]:
+        params = "\n| Name | Description | Type | Default |\n"
+        params += "| --- | --- | --- | --- |\n"
+        for name, p in definition["params"].items():
+            type_ = p["type"].__name__ if inspect.isclass(p["type"]) else p["type"]
+            params += f"| {name} | {p['desc']} | {type_} | {p['default']} |\n"
+
+    # Handle nodes
+    nodes = " N/A\n"
+    if definition["nodes"]:
+        nodes = "\n| Name | Description | Type | Input | Output |\n"
+        nodes += "| --- | --- | --- | --- | --- |\n"
+        for name, n in definition["nodes"].items():
+            type_ = n["type"].__name__ if inspect.isclass(n["type"]) else str(n["type"])
+            input_ = (
+                n["input"].__name__ if inspect.isclass(n["input"]) else str(n["input"])
+            )
+            output_ = (
+                n["output"].__name__
+                if inspect.isclass(n["output"])
+                else str(n["output"])
+            )
+            nodes += f"|{name}|{n['desc']}|{type_}|{input_}|{output_}|\n"
+
+    description = inspect.cleandoc(definition["desc"])
+    return f"{description}\n\n_**Params:**_{params}\n_**Nodes:**_{nodes}"
+
+
+def make_doc(module: str, output: str, separation_level: int):
+    """Run exporting from compose to markdown
+
+    Args:
+        module (str): module name
+        output_path (str): output path to save
+        separation_level (int): level of separation
+    """
+    documentation = sorted(
+        get_compose_documentation_from_module(module).items(), key=lambda x: x[0]
+    )
+
+    entries = defaultdict(list)
+
+    for name, definition in documentation:
+        section = name.split(".")[separation_level].capitalize()
+        cls_name = name.split(".")[-1]
+
+        markdown = from_definition_to_markdown(definition)
+        entries[section].append(f"### {cls_name}\n{markdown}")
+
+    final = "\n".join(
+        [f"## {section}\n" + "\n".join(entries[section]) for section in entries]
+    )
+
+    with open(output, "w") as f:
+        f.write(final)
--- a/knowledgehub/embeddings/openai.py
+++ b/knowledgehub/embeddings/openai.py
@@ -4,10 +4,20 @@ from .base import LangchainEmbeddings


 class OpenAIEmbeddings(LangchainEmbeddings):
+    """OpenAI embeddings.
+
+    This method is wrapped around the Langchain OpenAIEmbeddings class.
+    """
+
    _lc_class = LCOpenAIEmbeddings


 class AzureOpenAIEmbeddings(LangchainEmbeddings):
+    """Azure OpenAI embeddings.
+
+    This method is wrapped around the Langchain OpenAIEmbeddings class.
+    """
+
    _lc_class = LCOpenAIEmbeddings

    def __init__(self, **params):
--- a/knowledgehub/pipelines/cot.py
+++ b/knowledgehub/pipelines/cot.py
@@ -15,7 +15,7 @@ class Thought(BaseComponent):
    value is the value.
    - Output: an output dictionary

-    ##### Usage:
+    _**Usage:**_

    Create and run a thought:

@@ -80,6 +80,7 @@ class Thought(BaseComponent):

    @Node.decorate(depends_on="prompt")
    def prompt_template(self):
+        """Automatically wrap around param prompt. Can ignore"""
        return BasePromptComponent(self.prompt)

    def run(self, **kwargs) -> dict:
@@ -104,7 +105,7 @@ class ManualSequentialChainOfThought(BaseComponent):
    `kotaemon.pipelines.cot.Thought`. Please refer that section for
    Thought's detail. This section is about chaining thought together.

-    ##### Usage:
+    _**Usage:**_

    **Create and run a chain of thought without "+" operator:**