Refactor excel Loader (#79)
This commit is contained in:
parent
cc1e75b3c6
commit
98c76c4700
|
@ -14,7 +14,7 @@ from kotaemon.base import Document
|
||||||
class PandasExcelReader(BaseReader):
|
class PandasExcelReader(BaseReader):
|
||||||
r"""Pandas-based CSV parser.
|
r"""Pandas-based CSV parser.
|
||||||
|
|
||||||
Parses CSVs using the separator detection from Pandas `read_csv`function.
|
Parses CSVs using the separator detection from Pandas `read_csv` function.
|
||||||
If special parameters are required, use the `pandas_config` dict.
|
If special parameters are required, use the `pandas_config` dict.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
@ -31,12 +31,14 @@ class PandasExcelReader(BaseReader):
|
||||||
*args: Any,
|
*args: Any,
|
||||||
pandas_config: Optional[dict] = None,
|
pandas_config: Optional[dict] = None,
|
||||||
row_joiner: str = "\n",
|
row_joiner: str = "\n",
|
||||||
|
col_joiner: str = " ",
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Init params."""
|
"""Init params."""
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
self._pandas_config = pandas_config or {}
|
self._pandas_config = pandas_config or {}
|
||||||
self._row_joiner = row_joiner if row_joiner else "\n"
|
self._row_joiner = row_joiner if row_joiner else "\n"
|
||||||
|
self._col_joiner = col_joiner if col_joiner else " "
|
||||||
|
|
||||||
def load_data(
|
def load_data(
|
||||||
self,
|
self,
|
||||||
|
@ -88,7 +90,9 @@ class PandasExcelReader(BaseReader):
|
||||||
|
|
||||||
output = [
|
output = [
|
||||||
Document(
|
Document(
|
||||||
text=self._row_joiner.join(" ".join(sublist) for sublist in text_list),
|
text=self._row_joiner.join(
|
||||||
|
self._col_joiner.join(sublist) for sublist in text_list
|
||||||
|
),
|
||||||
metadata={"source": file.stem},
|
metadata={"source": file.stem},
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
Loading…
Reference in New Issue
Block a user