fix: add validation to avoid path-traversal vulnerabilities (#755)

* fix: add validation to avoid path-traversal vulnerabilities

* fix: update init value is_safe

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* refactor: extract zip check

* fix: dont need to check relative path

* fix: disable check zip file (zipfile have taken it)

---------

Co-authored-by: kan_cin <kan@cinnamon.is>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: phv2312 <kat87yb@gmail.com>
This commit is contained in:
Khoi-Nguyen Nguyen-Ngoc 2025-07-02 14:50:40 +07:00 committed by GitHub
parent ec1f6abdc4
commit 37cdc28ceb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1059,15 +1059,18 @@ class FileIndexPage(BasePage):
"""Handle zip files"""
zip_files = [file for file in files if file.endswith(".zip")]
remaining_files = [file for file in files if not file.endswith("zip")]
errors: list[str] = []
# Clean-up <zip_dir> before unzip to remove old files
shutil.rmtree(zip_dir, ignore_errors=True)
# Unzip
for zip_file in zip_files:
# Prepare new zip output dir, separated for each files
basename = os.path.splitext(os.path.basename(zip_file))[0]
zip_out_dir = os.path.join(zip_dir, basename)
os.makedirs(zip_out_dir, exist_ok=True)
with zipfile.ZipFile(zip_file, "r") as zip_ref:
zip_ref.extractall(zip_out_dir)
@ -1084,7 +1087,7 @@ class FileIndexPage(BasePage):
if n_zip_file > 0:
print(f"Update zip files: {n_zip_file}")
return remaining_files
return remaining_files, errors
def index_fn(
self, files, urls, reindex: bool, settings, user_id
@ -1100,16 +1103,18 @@ class FileIndexPage(BasePage):
"""
if urls:
files = [it.strip() for it in urls.split("\n")]
errors = []
errors = self.validate_urls(files)
else:
if not files:
gr.Info("No uploaded file")
yield "", ""
return
files, unzip_errors = self._may_extract_zip(
files, flowsettings.KH_ZIP_INPUT_DIR
)
errors = self.validate_files(files)
errors.extend(unzip_errors)
files = self._may_extract_zip(files, flowsettings.KH_ZIP_INPUT_DIR)
errors = self.validate(files)
if errors:
gr.Warning(", ".join(errors))
yield "", ""
@ -1569,7 +1574,7 @@ class FileIndexPage(BasePage):
selected_item["files"],
)
def validate(self, files: list[str]):
def validate_files(self, files: list[str]):
"""Validate if the files are valid"""
paths = [Path(file) for file in files]
errors = []
@ -1598,6 +1603,14 @@ class FileIndexPage(BasePage):
return errors
def validate_urls(self, urls: list[str]):
"""Validate if the urls are valid"""
errors = []
for url in urls:
if not url.startswith("http") and not url.startswith("https"):
errors.append(f"Invalid url `{url}`")
return errors
class FileSelector(BasePage):
"""File selector UI in the Chat page"""