[AUR-401] Disable Haystack telemetry with monkey patching (#1)

Sample Haystack log when running a pipeline. Note: the `pipeline.classname` can leak company information.

```json
{
  "hardware.cpus": 16,
  "hardware.gpus": 0,
  "libraries.colab": false,
  "libraries.cuda": false,
  "libraries.haystack": "1.20.0rc0",
  "libraries.ipython": false,
  "libraries.pytest": false,
  "libraries.ray": false,
  "libraries.torch": false,
  "libraries.transformers": "4.31.0",
  "os.containerized": false,
  "os.family": "Linux",
  "os.machine": "x86_64",
  "os.version": "6.2.0-26-generic",
  "pipeline.classname": "TempPipeline",
  "pipeline.config_hash": "07a8eddd5a6e512c0d898c6d9f445ed9",
  "pipeline.nodes.PromptNode": 1,
  "pipeline.nodes.Shaper": 1,
  "pipeline.nodes.WebRetriever": 1,
  "pipeline.run_parameters.debug": false,
  "pipeline.run_parameters.documents": [
    0
  ],
  "pipeline.run_parameters.file_paths": 0,
  "pipeline.run_parameters.labels": 0,
  "pipeline.run_parameters.meta": 1,
  "pipeline.run_parameters.params": false,
  "pipeline.run_parameters.queries": true,
  "pipeline.runs": 1,
  "pipeline.type": "Query",
  "python.version": "3.10.12"
}
```

Solution: Haystack telemetry uses the `telemetry` variable, `posthog` library and `HAYSTACK_TELEMETRY_ENABLED` envar. We set the envar to False and make sure the relevant objects are disabled.
This commit is contained in:
Nguyen Trung Duc (john)
2023-08-22 10:02:46 +07:00
committed by GitHub
parent 043209fda7
commit e9d1d5c118
4 changed files with 548 additions and 0 deletions

View File

@@ -0,0 +1,51 @@
import codecs
import re
from pathlib import Path
import setuptools
def read(file_path: str) -> str:
return codecs.open(file_path, "r").read()
def get_version() -> str:
version_file = read(str(Path("kotaemon", "__init__.py")))
match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", version_file, re.M)
if match:
return match.group(1)
raise RuntimeError("Cannot find verison string")
setuptools.setup(
name="kotaemon",
version=get_version(),
author="john",
author_email="john@cinnamon.com",
description="Kotaemon core library for AI development",
long_description=read("README.md"),
long_description_content_type="text/markdown",
url="https://github.com/Cinnamon/kotaemon/",
packages=setuptools.find_packages(exclude=("tests", "tests.*")),
install_requires=[
"farm-haystack"
],
extras_require={
"dev": [
"pytest",
"pre-commit",
"black",
"flake8",
"sphinx",
"coverage",
]
},
entry_points={"console_scripts": ["kh=kotaemon.cli:main"]},
python_requires=">=3",
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
],
include_package_data=True,
)