from __future__ import annotations from abc import abstractmethod from typing import Type from langchain.schema.embeddings import Embeddings as LCEmbeddings from theflow import Param from ..base import BaseComponent from ..documents.base import Document class BaseEmbeddings(BaseComponent): @abstractmethod def run( self, text: str | list[str] | Document | list[Document] ) -> list[list[float]]: ... class LangchainEmbeddings(BaseEmbeddings): _lc_class: Type[LCEmbeddings] def __init__(self, **params): if self._lc_class is None: raise AttributeError( "Should set _lc_class attribute to the LLM class from Langchain " "if using LLM from Langchain" ) self._kwargs: dict = {} for param in list(params.keys()): if param in self._lc_class.__fields__: # type: ignore self._kwargs[param] = params.pop(param) super().__init__(**params) def __setattr__(self, name, value): if name in self._lc_class.__fields__: self._kwargs[name] = value else: super().__setattr__(name, value) @Param.auto(cache=False) def agent(self): return self._lc_class(**self._kwargs) def run(self, text) -> list[list[float]]: input_: list[str] = [] if not isinstance(text, list): text = [text] for item in text: if isinstance(item, str): input_.append(item) elif isinstance(item, Document): input_.append(item.text) else: raise ValueError( f"Invalid input type {type(item)}, should be str or Document" ) return self.agent.embed_documents(input_)