feat: support for visualizing citation results (via embeddings) (#461)

* feat:support for visualizing citation results (via embeddings)

Signed-off-by: Kennywu <jdlow@live.cn>

* fix: remove ktem dependency in visualize_cited

* fix: limit onnx version for fastembed

* fix: test case of indexing

* fix: minor update

* fix: chroma req

* fix: chroma req

---------

Signed-off-by: Kennywu <jdlow@live.cn>
Co-authored-by: Tadashi <tadashi@cinnamon.is>
This commit is contained in:
KennyWu
2024-11-05 15:02:57 +08:00
committed by GitHub
parent bd2490bef1
commit d127fec9f7
4 changed files with 196 additions and 5 deletions

View File

@@ -53,7 +53,11 @@ class VectorIndexing(BaseIndexing):
def write_chunk_to_file(self, docs: list[Document]):
# save the chunks content into markdown format
if self.cache_dir:
file_name = Path(docs[0].metadata["file_name"])
file_name = docs[0].metadata.get("file_name")
if not file_name:
return
file_name = Path(file_name)
for i in range(len(docs)):
markdown_content = ""
if "page_label" in docs[i].metadata:

View File

@@ -38,6 +38,7 @@ dependencies = [
"langchain-cohere>=0.2.4,<0.3.0",
"llama-hub>=0.0.79,<0.1.0",
"llama-index>=0.10.40,<0.11.0",
"chromadb<=0.5.16",
"llama-index-vector-stores-chroma>=0.1.9",
"llama-index-vector-stores-lancedb",
"openai>=1.23.6,<2",
@@ -52,7 +53,8 @@ dependencies = [
"python-dotenv>=1.0.1,<1.1",
"tenacity>=8.2.3,<8.3",
"theflow>=0.8.6,<0.9.0",
"trogon>=0.5.0,<0.6"
"trogon>=0.5.0,<0.6",
"umap-learn==0.5.5",
]
readme = "README.md"
authors = [
@@ -71,6 +73,7 @@ adv = [
"duckduckgo-search>=6.1.0,<6.2",
"elasticsearch>=8.13.0,<8.14",
"fastembed",
"onnxruntime<v1.20",
"googlesearch-python>=1.2.4,<1.3",
"llama-cpp-python<0.2.8",
"llama-index>=0.10.40,<0.11.0",