From cc75afc0b560c6cd98a66e6a969f553bfa958a36 Mon Sep 17 00:00:00 2001 From: Jeffrey Martin Date: Thu, 18 Jul 2024 09:40:46 -0500 Subject: [PATCH] use XDG paths for configuration data and caching Support using [XDG ver 0.8](https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html) for project data. Specifically support: | ENV VAR | DEFAULT | |------------------|--------------------| | $XDG_DATA_HOME | $HOME/.local/share | | $XDG_CONFIG_HOME | $HOME/.config | | $XDG_CACHE_HOME | $HOME/.cache | Project name `garak` is appended to each location. This is represents the followina breaking changes to project expecations: * report_prefix passed either at the command line or as config file option * set filename values only * no longer overrides report_dir * report_dir passed as a config file option * when provided as a relative path will be prepend with `/garak` * provided as an absolute path will be used as the output directory * default `user/site` configuration file `garak.site.yaml` has moved * previously `/garak.site.yaml` * updated location `/garak/garak.site.yaml` Additional changes (not considered breaking changes): * nltk data is placed in /garak if not already found in the environment * visual_jailbreak downloaded artifacts are placed in /garak/resources * generated data for beast/gcg/tap are placed in /garak/resources Signed-off-by: Jeffrey Martin --- garak/_config.py | 16 ++- garak/_plugins.py | 7 +- garak/command.py | 33 ++++--- garak/evaluators/base.py | 12 ++- garak/interactive.py | 2 +- garak/probes/tap.py | 3 +- garak/probes/visual_jailbreak.py | 6 +- garak/resources/autodan/autodan.py | 15 +-- garak/resources/autodan/genetic.py | 29 +++++- garak/resources/beast/beast_attack.py | 12 ++- garak/resources/common.py | 12 ++- garak/resources/gcg/generate_gcg.py | 15 +-- garak/resources/tap/tap_main.py | 8 +- pyproject.toml | 3 +- requirements.txt | 1 + tests/analyze/test_analyze.py | 15 ++- tests/buffs/test_buff_config.py | 24 +++-- tests/test_attempt.py | 12 ++- tests/test_config.py | 135 ++++++++++++++++++++++++-- 19 files changed, 288 insertions(+), 72 deletions(-) diff --git a/garak/_config.py b/garak/_config.py index 10aa270f..52ee5716 100644 --- a/garak/_config.py +++ b/garak/_config.py @@ -15,6 +15,11 @@ import pathlib from typing import List import yaml +from xdg_base_dirs import ( + xdg_cache_home, + xdg_config_home, + xdg_data_home, +) DICT_CONFIG_AFTER_LOAD = False @@ -26,6 +31,7 @@ run_params = "seed deprefix eval_threshold generations probe_tags interactive".split() plugins_params = "model_type model_name extended_detectors".split() reporting_params = "taxonomy report_prefix".split() +project_dir = "garak" loaded = False @@ -53,9 +59,17 @@ class TransientConfig(GarakSubConfig): args = None # only access this when determining what was passed on CLI run_id = None basedir = pathlib.Path(__file__).parents[0] + config_dir = xdg_config_home() / project_dir + data_dir = xdg_data_home() / project_dir + cache_dir = xdg_cache_home() / project_dir starttime = None starttime_iso = None + # initialize the user home and cache paths if they do not exist + config_dir.mkdir(mode=0o740, parents=True, exist_ok=True) + data_dir.mkdir(mode=0o740, parents=True, exist_ok=True) + cache_dir.mkdir(mode=0o740, parents=True, exist_ok=True) + transient = TransientConfig() @@ -151,7 +165,7 @@ def load_config( settings_files = [str(transient.basedir / "resources" / "garak.core.yaml")] - fq_site_config_filename = str(transient.basedir / site_config_filename) + fq_site_config_filename = str(transient.config_dir / site_config_filename) if os.path.isfile(fq_site_config_filename): settings_files.append(fq_site_config_filename) else: diff --git a/garak/_plugins.py b/garak/_plugins.py index e2c7fe92..3875389f 100644 --- a/garak/_plugins.py +++ b/garak/_plugins.py @@ -36,7 +36,9 @@ def default(self, obj): class PluginCache: _plugin_cache_file = _config.transient.basedir / "resources" / "plugin_cache.json" - _user_plugin_cache_file = _plugin_cache_file + _user_plugin_cache_file = ( + _config.transient.cache_dir / "resources" / "plugin_cache.json" + ) _plugin_cache_dict = None def __init__(self) -> None: @@ -55,6 +57,9 @@ def _load_plugin_cache(self): if not os.path.exists(self._plugin_cache_file): self._build_plugin_cache() if not os.path.exists(self._user_plugin_cache_file): + self._user_plugin_cache_file.parent.mkdir( + mode=0o740, parents=True, exist_ok=True + ) shutil.copy2(self._plugin_cache_file, self._user_plugin_cache_file) with open(self._user_plugin_cache_file, "r", encoding="utf-8") as cache_file: local_cache = json.load(cache_file) diff --git a/garak/command.py b/garak/command.py index 4a700778..34d4b155 100644 --- a/garak/command.py +++ b/garak/command.py @@ -8,8 +8,10 @@ def start_logging(): + from garak import _config + logging.basicConfig( - filename="garak.log", + filename=_config.transient.data_dir / "garak.log", level=logging.DEBUG, format="%(asctime)s %(levelname)s %(message)s", ) @@ -32,6 +34,7 @@ def start_run(): import os import uuid + from pathlib import Path from garak import _config logging.info("started at %s", _config.transient.starttime_iso) @@ -41,19 +44,25 @@ def start_run(): "⚠️ The current/default config is optimised for speed rather than thoroughness. Try e.g. --config full for a stronger test, or specify some probes." ) _config.transient.run_id = str(uuid.uuid4()) # uuid1 is safe but leaks host info + # why is report prefix a condition of placing file in the report_dir? + report_path = Path(_config.reporting.report_dir) + if not report_path.is_absolute(): + logging.debug("relative report dir provided") + report_path = _config.transient.data_dir / _config.reporting.report_dir + if not os.path.isdir(report_path): + try: + report_path.mkdir(mode=0o740, parents=True, exist_ok=True) + except PermissionError as e: + raise PermissionError( + f"Can't create logging directory {report_path}, quitting" + ) from e + + filename = f"garak.{_config.transient.run_id}.report.jsonl" if not _config.reporting.report_prefix: - if not os.path.isdir(_config.reporting.report_dir): - try: - os.mkdir(_config.reporting.report_dir) - except PermissionError as e: - raise PermissionError( - f"Can't create logging directory {_config.reporting.report_dir}, quitting" - ) from e - _config.transient.report_filename = f"{_config.reporting.report_dir}/garak.{_config.transient.run_id}.report.jsonl" + filename = f"garak.{_config.transient.run_id}.report.jsonl" else: - _config.transient.report_filename = ( - _config.reporting.report_prefix + ".report.jsonl" - ) + filename = _config.reporting.report_prefix + ".report.jsonl" + _config.transient.report_filename = str(report_path / filename) _config.transient.reportfile = open( _config.transient.report_filename, "w", buffering=1, encoding="utf-8" ) diff --git a/garak/evaluators/base.py b/garak/evaluators/base.py index 6152e695..15fea84e 100644 --- a/garak/evaluators/base.py +++ b/garak/evaluators/base.py @@ -5,6 +5,7 @@ import json import logging +from pathlib import Path from typing import Iterable from colorama import Fore, Style @@ -72,12 +73,19 @@ def evaluate(self, attempts: Iterable[garak.attempt.Attempt]) -> None: hitlog_mode = ( "w" if _config.transient.hitlogfile is None else "a" ) + # why is report prefix a condition of placing file in the report_dir? + hitlog_path = Path(_config.transient.report_filename).parent if not _config.reporting.report_prefix: - hitlog_filename = f"{_config.reporting.report_dir}/garak.{_config.transient.run_id}.hitlog.jsonl" + hitlog_filename = ( + hitlog_path + / f"garak.{_config.transient.run_id}.hitlog.jsonl" + ) # else: hitlog_filename = ( - _config.reporting.report_prefix + ".hitlog.jsonl" + hitlog_path + / f"{_config.reporting.report_prefix}.hitlog.jsonl" ) + logging.info("hit log in %s", hitlog_filename) _config.transient.hitlogfile = open( hitlog_filename, diff --git a/garak/interactive.py b/garak/interactive.py index 309c3d56..3049491f 100644 --- a/garak/interactive.py +++ b/garak/interactive.py @@ -233,7 +233,7 @@ def settings_ns_provider(self) -> argparse.Namespace: def banner(): """Display a random banner""" with open( - _config.transient.basedir / "resources/banners", "r", encoding="utf-8" + _config.transient.basedir / "resources" / "banners", "r", encoding="utf-8" ) as bannerfile: banners = bannerfile.read().lstrip().split("\n\n") print(Fore.LIGHTRED_EX + f"\n{random.choice(banners)}\n" + Fore.RESET) diff --git a/garak/probes/tap.py b/garak/probes/tap.py index 4d22f0d0..e64c04e0 100644 --- a/garak/probes/tap.py +++ b/garak/probes/tap.py @@ -37,6 +37,7 @@ """ import logging +from pathlib import Path from typing import List import tqdm @@ -68,7 +69,7 @@ class TAPCached(Probe): def __init__( self, - prompts_location: str = _config.transient.basedir + prompts_location: Path = _config.transient.basedir / "resources" / "tap" / "data" diff --git a/garak/probes/visual_jailbreak.py b/garak/probes/visual_jailbreak.py index 4afafd2f..24d68340 100644 --- a/garak/probes/visual_jailbreak.py +++ b/garak/probes/visual_jailbreak.py @@ -49,7 +49,7 @@ class FigStep(Probe): def _load_SafeBench(self): safebench_data_dir = ( - _config.transient.basedir / "resources" / "visual_jailbreak" / "SafeBench" + _config.transient.cache_dir / "resources" / "visual_jailbreak" / "SafeBench" ) if not os.path.exists(safebench_data_dir): # make the dir @@ -94,7 +94,7 @@ def probe(self, generator): self.prompts = [ { "text": prompt["text"], - "image": str(_config.transient.basedir / prompt["image"]), + "image": str(_config.transient.cache_dir / prompt["image"]), } for prompt in self.prompts ] @@ -125,7 +125,7 @@ def probe(self, generator): self.prompts = [ { "text": prompt["text"], - "image": str(_config.transient.basedir / prompt["image"]), + "image": str(_config.transient.cache_dir / prompt["image"]), } for prompt in self.prompts ] diff --git a/garak/resources/autodan/autodan.py b/garak/resources/autodan/autodan.py index de82ef9a..690df6c3 100644 --- a/garak/resources/autodan/autodan.py +++ b/garak/resources/autodan/autodan.py @@ -31,6 +31,9 @@ autodan_resource_data = ( garak._config.transient.basedir / "resources" / "autodan" / "data" ) +cached_autodan_resource_data = ( + garak._config.transient.cache_dir / "resources" / "autodan" / "data" +) autodan_parser = argparse.ArgumentParser(description="AutoDAN config") autodan_parser.add_argument( "--num_steps", type=int, default=100, help="Number of steps to run generation" @@ -88,9 +91,9 @@ def autodan_generate( mutation_generator_name: str = "gpt-3.5-turbo", mutation_generator_type: str = "openai", hierarchical: bool = False, - out_path: str = str(autodan_resource_data / "autodan_prompts.txt"), - init_prompt_path: str = str(autodan_resource_data / "autodan_init.txt"), - reference_path: str = str(autodan_resource_data / "prompt_group.pth"), + out_path: Path = cached_autodan_resource_data / "autodan_prompts.txt", + init_prompt_path: Path = autodan_resource_data / "autodan_init.txt", + reference_path: Path = autodan_resource_data / "prompt_group.pth", low_memory: bool = False, random_seed: int = None, ): @@ -109,9 +112,9 @@ def autodan_generate( mutation_generator_name (str): Name of model to use as the mutation generator mutation_generator_type (str): Type of model to use as the mutation generator hierarchical (bool): Whether ot use hierarchical GA - out_path (str): Path to write generated AutoDAN string - init_prompt_path (str): Path to initial prompts - reference_path (str): Path to reference prompt tensors + out_path (Path): Path to write generated AutoDAN string + init_prompt_path (Path): Path to initial prompts + reference_path (Path): Path to reference prompt tensors low_memory (bool): Whether to use low memory random_seed (int): Random seed, if used. diff --git a/garak/resources/autodan/genetic.py b/garak/resources/autodan/genetic.py index b16d81a8..dd788940 100644 --- a/garak/resources/autodan/genetic.py +++ b/garak/resources/autodan/genetic.py @@ -2,33 +2,54 @@ # SPDX-License-Identifier: Apache-2.0 import gc +import nltk.downloader import numpy as np import torch import random import openai -import os import re import nltk from nltk.corpus import stopwords, wordnet from collections import defaultdict, OrderedDict +from pathlib import Path import sys import time from logging import getLogger from typing import Tuple +from garak import _config from garak.resources.autodan.model_utils import AutoDanPrefixManager, forward logger = getLogger(__name__) + +def _nltk_data(): + """Set nltk_data location, if an existing default is found utilize it, otherwise add to project's cache location.""" + from nltk.downloader import Downloader + + default_path = Path(Downloader().default_download_dir()) + if not default_path.exists(): + # if path not found then place in the user cache + # get env var for NLTK_DATA, fallback to create in cachedir / nltk_data + logger.debug("nltk_data location not found using project cache location") + _nltk_data_path.mkdir(mode=0o740, parents=True, exist_ok=True) + default_path = _nltk_data_path + return default_path + + +_nltk_data_path = _config.transient.cache_dir / "nltk_data" +nltk.data.path.append(str(_nltk_data_path)) + # TODO: Refactor into setup.py try: _ = stopwords.words("english") _ = nltk.word_tokenize("This is a normal English sentence") _ = wordnet.synsets("word") except LookupError as e: - nltk.download("stopwords") - nltk.download("punkt") - nltk.download("wordnet") + download_path = _nltk_data() + nltk.download("stopwords", download_dir=download_path) + nltk.download("punkt", download_dir=download_path) + nltk.download("wordnet", download_dir=download_path) # TODO: Could probably clean up the inputs here by using imports. diff --git a/garak/resources/beast/beast_attack.py b/garak/resources/beast/beast_attack.py index 3b0402db..b04f5091 100644 --- a/garak/resources/beast/beast_attack.py +++ b/garak/resources/beast/beast_attack.py @@ -2,6 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 import logging +from pathlib import Path from typing import Union, Optional, Tuple import torch @@ -17,7 +18,7 @@ "A chat between a curious user and an artificial intelligence assistant. " "The assistant gives helpful, detailed, and polite answers to the user's questions." ) -beast_resource_data = garak._config.transient.basedir / "resources" / "beast" / "data" +beast_resource_data = garak._config.transient.cache_dir / "resources" / "beast" / "data" def _format_chat(generator: Generator, prompt: str): @@ -296,7 +297,11 @@ def _attack( if responses is None: responses = ["" for _ in range(len(prompts))] for prompt, response in tqdm( - zip(prompts, responses), total=len(prompts), leave=False, position=0, desc="BEAST attack" + zip(prompts, responses), + total=len(prompts), + leave=False, + position=0, + desc="BEAST attack", ): best_candidate = [] if trials > 1: @@ -342,7 +347,7 @@ def run_beast( suffix_len: int = 40, data_size: int = 20, target: Optional[str] = "", - outfile: str = beast_resource_data / "suffixes.txt", + outfile: Path = beast_resource_data / "suffixes.txt", stop_early: bool = False, ) -> Union[list[str], None]: """ @@ -395,6 +400,7 @@ def run_beast( ) if suffixes and outfile: + outfile.parent.mkdir(mode=0o740, parents=True, exist_ok=True) with open(outfile, "a") as f: for suffix in suffixes: f.write(f"{suffix}\n") diff --git a/garak/resources/common.py b/garak/resources/common.py index 0f7d5cab..8112b541 100644 --- a/garak/resources/common.py +++ b/garak/resources/common.py @@ -1,4 +1,5 @@ import logging +import shutil import urllib.error from pathlib import Path import pandas as pd @@ -42,12 +43,21 @@ def load_advbench(size: int = 0) -> pd.DataFrame: - advbench_path = ( + advbench_base_path = ( garak._config.transient.basedir / "resources" / "advbench" / "harmful_behaviors.csv" ) + advbench_path = ( + garak._config.transient.cache_dir + / "resources" + / "advbench" + / "harmful_behaviors.csv" + ) + if advbench_base_path.is_file() and not advbench_path.is_file(): + shutil.copy2(advbench_base_path, advbench_path) + if not advbench_path.is_file(): try: hb = "https://raw.githubusercontent.com/llm-attacks/llm-attacks/main/data/advbench/harmful_behaviors.csv" diff --git a/garak/resources/gcg/generate_gcg.py b/garak/resources/gcg/generate_gcg.py index 8c2a3abe..f004d265 100644 --- a/garak/resources/gcg/generate_gcg.py +++ b/garak/resources/gcg/generate_gcg.py @@ -37,7 +37,8 @@ logger = getLogger(__name__) -gcg_resource_data = garak._config.transient.basedir / "resources" / "gcg" / "data" +resource_data = garak._config.transient.basedir / "resources" +gcg_resource_data = garak._config.transient.cache_dir / "resources" / "gcg" / "data" # GCG parser used by interactive mode gcg_parser = ArgumentParser() @@ -52,7 +53,7 @@ gcg_parser.add_argument( "--train_data", type=str, - default=gcg_resource_data / "advbench" / "harmful_behaviors.csv", + default=resource_data / "advbench" / "harmful_behaviors.csv", help="Path to training data", ) gcg_parser.add_argument( @@ -64,7 +65,7 @@ gcg_parser.add_argument( "--outfile", type=str, - default=gcg_resource_data / "gcg_prompts.txt", + default=resource_data / "gcg_prompts.txt", help="Location to write GCG attack output", ) gcg_parser.add_argument( @@ -92,10 +93,10 @@ def run_gcg( transfer: bool = False, progressive: bool = False, stop_success: bool = True, - train_data: Union[str,None] = None, + train_data: Union[str, None] = None, n_train: int = 50, n_test: int = 0, - outfile: str = gcg_resource_data / "gcg.txt", + outfile: Path = gcg_resource_data / "gcg.txt", control_init: str = CONTROL_INIT, deterministic: bool = True, n_steps: int = 500, @@ -124,7 +125,7 @@ def run_gcg( train_data (str): Path to training data n_train (int): Number of training examples to use n_test (int): Number of test examples to use - outfile (str): Where to write successful prompts + outfile (Path): Where to write successful prompts control_init (str): Initial adversarial suffix to modify deterministic (bool): Whether or not to use deterministic gbda n_steps (int): Number of training steps @@ -178,7 +179,7 @@ def run_gcg( logfile = gcg_resource_data / "logs" f"{timestamp}_{model_string}.json" # Create logfile directory - p = Path(logfile).parent + p = logfile.parent p.mkdir(parents=True, exist_ok=True) ( diff --git a/garak/resources/tap/tap_main.py b/garak/resources/tap/tap_main.py index ac674184..61bdda3a 100644 --- a/garak/resources/tap/tap_main.py +++ b/garak/resources/tap/tap_main.py @@ -4,6 +4,7 @@ import copy import re import torch.cuda +from pathlib import Path from tqdm import tqdm from logging import getLogger from typing import Union, Optional @@ -34,7 +35,7 @@ SAVE_RESULTS = True resources_tap_data_file = ( - garak._config.transient.basedir + garak._config.transient.cache_dir / "resources" / "tap" / "data" @@ -340,7 +341,7 @@ def run_tap( keep_last_n: int = 1, pruning: bool = True, save_results: bool = SAVE_RESULTS, - outfile: str = resources_tap_data_file, + outfile: Path = resources_tap_data_file, ): """ Function for generating attacks using TAP where a generator has already been instantiated. @@ -527,6 +528,7 @@ def run_tap( jailbreaks = list(set(jailbreaks)) msg = f"Found {len(jailbreaks)} jailbreak(s). Exiting." if save_results: + outfile.parent.mkdir(mode=0o740, parents=True, exist_ok=True) with open(outfile, "a", encoding="utf-8") as f: for jailbreak in jailbreaks: f.write(f"{jailbreak}\n") @@ -565,7 +567,7 @@ def generate_tap( n_streams: int = 1, keep_last_n: int = 1, save_results: bool = SAVE_RESULTS, - outfile: str = resources_tap_data_file, + outfile: Path = resources_tap_data_file, ): """ Function for generating attacks using TAP when a generator has not been instantiated. diff --git a/pyproject.toml b/pyproject.toml index b112d0a4..ebe1da6c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,7 +67,8 @@ dependencies = [ "fschat>=0.2.36", "litellm>=1.33.8", "jsonpath-ng>=1.6.1", - "lorem==0.1.1" + "lorem==0.1.1", + "xdg-base-dirs>=6.0.1", ] [project.optional-dependencies] diff --git a/requirements.txt b/requirements.txt index ac25f769..192b642a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -29,6 +29,7 @@ fschat>=0.2.36 litellm>=1.33.8 jsonpath-ng>=1.6.1 lorem==0.1.1 +xdg-base-dirs>=6.0.1 # tests pytest>=8.0 requests-mock==1.12.1 diff --git a/tests/analyze/test_analyze.py b/tests/analyze/test_analyze.py index df93dd31..a20f68d7 100644 --- a/tests/analyze/test_analyze.py +++ b/tests/analyze/test_analyze.py @@ -1,13 +1,12 @@ # SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 -import os import subprocess import sys import pytest -from garak import cli +from garak import cli, _config TEMP_PREFIX = "_garak_internal_test_temp" @@ -23,7 +22,11 @@ def test_analyze_log_runs(): sys.executable, "-m", "garak.analyze.analyze_log", - TEMP_PREFIX + ".report.jsonl", + str( + _config.transient.data_dir + / _config.reporting.report_dir + / f"{TEMP_PREFIX}.report.jsonl" + ), ], check=True, ) @@ -36,7 +39,11 @@ def test_report_digest_runs(): sys.executable, "-m", "garak.analyze.report_digest", - TEMP_PREFIX + ".report.jsonl", + str( + _config.transient.data_dir + / _config.reporting.report_dir + / f"{TEMP_PREFIX}.report.jsonl" + ), ], check=True, ) diff --git a/tests/buffs/test_buff_config.py b/tests/buffs/test_buff_config.py index 5ab21c0f..815bd405 100644 --- a/tests/buffs/test_buff_config.py +++ b/tests/buffs/test_buff_config.py @@ -16,11 +16,13 @@ import pytest -import garak -import garak.cli +from garak import cli, _config PREFIX = "test_buff_single" + str(uuid.uuid4()) +_config.load_config() +REPORT_PATH = _config.transient.data_dir / _config.reporting.report_dir + def test_include_original_prompt(): # https://github.com/python/cpython/pull/97015 to ensure Windows compatibility @@ -34,13 +36,15 @@ def test_include_original_prompt(): ) ) tmp.close() - garak.cli.main( + cli.main( f"-m test -p test.Test -b lowercase.Lowercase --config {tmp.name} --report_prefix {PREFIX}".split() ) os.remove(tmp.name) prompts = [] - with open(f"{PREFIX}.report.jsonl", "r", encoding="utf-8") as reportfile: + with open( + REPORT_PATH / f"{PREFIX}.report.jsonl", "r", encoding="utf-8" + ) as reportfile: for line in reportfile: r = json.loads(line) if r["entry_type"] == "attempt" and r["status"] == 1: @@ -68,13 +72,15 @@ def test_exclude_original_prompt(): ) ) tmp.close() - garak.cli.main( + cli.main( f"-m test -p test.Test -b lowercase.Lowercase --config {tmp.name} --report_prefix {PREFIX}".split() ) os.remove(tmp.name) prompts = [] - with open(f"{PREFIX}.report.jsonl", "r", encoding="utf-8") as reportfile: + with open( + REPORT_PATH / f"{PREFIX}.report.jsonl", "r", encoding="utf-8" + ) as reportfile: for line in reportfile: r = json.loads(line) if r["entry_type"] == "attempt" and r["status"] == 1: @@ -89,9 +95,9 @@ def cleanup(request): def remove_buff_reports(): files = [ - f"{PREFIX}.report.jsonl", - f"{PREFIX}.report.html", - f"{PREFIX}.hitlog.jsonl", + REPORT_PATH / f"{PREFIX}.report.jsonl", + REPORT_PATH / f"{PREFIX}.report.html", + REPORT_PATH / f"{PREFIX}.hitlog.jsonl", ] for file in files: if os.path.exists(file): diff --git a/tests/test_attempt.py b/tests/test_attempt.py index dc4c50e5..449c5b25 100644 --- a/tests/test_attempt.py +++ b/tests/test_attempt.py @@ -7,15 +7,19 @@ import pytest import garak.attempt -import garak.cli +from garak import cli, _config + +PREFIX = "_garak_test_attempt_sticky_params" def test_attempt_sticky_params(capsys): - garak.cli.main( - "-m test.Blank -g 1 -p atkgen,dan.Dan_6_0 --report_prefix _garak_test_attempt_sticky_params".split() + + cli.main( + f"-m test.Blank -g 1 -p atkgen,dan.Dan_6_0 --report_prefix {PREFIX}".split() ) + report_path = _config.transient.data_dir / _config.reporting.report_dir reportlines = ( - open("_garak_test_attempt_sticky_params.report.jsonl", "r", encoding="utf-8") + open(report_path / f"{PREFIX}.report.jsonl", "r", encoding="utf-8") .read() .split("\n") ) diff --git a/tests/test_config.py b/tests/test_config.py index b431c7c2..b084e777 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -6,10 +6,12 @@ import os import re import shutil +import sys import tempfile import pytest +from pathlib import Path from garak import _config import garak.cli @@ -56,6 +58,7 @@ ) ANSI_ESCAPE = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])") +XDG_VARS = ("XDG_DATA_HOME", "XDG_CONFIG_HOME", "XDG_CACHE_HOME") OPTIONS_SOLO = [ # "verbose", # not sure hot to test argparse action="count" @@ -95,20 +98,99 @@ def allow_site_config(request): site_cfg_moved = False try: - shutil.move("garak/garak.site.yaml", SITE_YAML_FILENAME) + shutil.move( + _config.transient.config_dir / "garak.site.yaml", SITE_YAML_FILENAME + ) site_cfg_moved = True except FileNotFoundError: site_cfg_moved = False def restore_site_config(): if site_cfg_moved: - shutil.move(SITE_YAML_FILENAME, "garak/garak.site.yaml") - elif os.path.exists("garak/garak.site.yaml"): - os.remove("garak/garak.site.yaml") + shutil.move( + SITE_YAML_FILENAME, _config.transient.config_dir / "garak.site.yaml" + ) + elif os.path.exists(_config.transient.config_dir / "garak.site.yaml"): + os.remove(_config.transient.config_dir / "garak.site.yaml") request.addfinalizer(restore_site_config) +@pytest.fixture +def override_xdg_env(request): + restore_vars = {} + with tempfile.TemporaryDirectory() as tmpdir: + for env_var in XDG_VARS: + current_val = os.getenv(env_var, None) + if current_val is not None: + restore_vars[env_var] = current_val + os.environ[env_var] = tmpdir + + def restore_xdg_env(): + for env_var in XDG_VARS: + restored = restore_vars.get(env_var) + if restored is not None: + os.environ[env_var] = restored + else: + del os.environ[env_var] + + request.addfinalizer(restore_xdg_env) + + return tmpdir + + +@pytest.fixture +def clear_xdg_env(request): + restore_vars = {} + for env_var in XDG_VARS: + current_val = os.getenv(env_var, None) + if current_val is not None: + restore_vars[env_var] = current_val + del os.environ[env_var] + + def restore_xdg_env(): + for env_var in XDG_VARS: + restored = restore_vars.get(env_var) + if restored is not None: + os.environ[env_var] = restored + else: + try: + del os.environ[env_var] + except KeyError as e: + pass + + request.addfinalizer(restore_xdg_env) + + +# environment variables adjust transient values +def test_xdg_support(override_xdg_env): + test_path = Path(override_xdg_env) + + importlib.reload(_config) + + assert _config.transient.cache_dir == test_path / _config.project_dir + assert _config.transient.config_dir == test_path / _config.project_dir + assert _config.transient.data_dir == test_path / _config.project_dir + + +@pytest.mark.usefixtures("clear_xdg_env") +def test_xdg_defaults(): + if "HOME" in os.environ: + test_path = Path(os.environ["HOME"]) + elif sys.platform == "win32" and "USERPROFILE" in os.environ: + # the xdg lib returns values prefixed with "USERPROFILE" on windows + test_path = Path(os.environ["USERPROFILE"]) + + importlib.reload(_config) + + assert _config.transient.cache_dir == test_path / ".cache" / _config.project_dir + assert _config.transient.config_dir == test_path / ".config" / _config.project_dir + assert ( + _config.transient.data_dir + == test_path / ".local" / "share" / _config.project_dir + ) + + # test CLI assertions of each var @pytest.mark.parametrize("option", OPTIONS_SOLO) def test_cli_solo_settings(option): @@ -188,7 +270,9 @@ def test_yaml_param_settings(param): def test_site_yaml_overrides_core_yaml(): importlib.reload(_config) - with open("garak/garak.site.yaml", "w", encoding="utf-8") as f: + with open( + _config.transient.config_dir / "garak.site.yaml", "w", encoding="utf-8" + ) as f: f.write("---\nrun:\n eval_threshold: 0.777\n") f.flush() garak.cli.main(["--list_config"]) @@ -201,7 +285,9 @@ def test_site_yaml_overrides_core_yaml(): def test_run_yaml_overrides_site_yaml(): importlib.reload(_config) - with open("garak/garak.site.yaml", "w", encoding="utf-8") as f: + with open( + _config.transient.config_dir / "garak.site.yaml", "w", encoding="utf-8" + ) as f: file_data = [ "---", "run:", @@ -613,15 +699,46 @@ def test_tag_filter(): assert "probes.lmrc.SexualContent" in found +# when provided an absolute path as `reporting.report_dir` do not used `user_data_dir` +def test_report_dir_full_path(): + importlib.reload(_config) + + with tempfile.TemporaryDirectory() as tmpdir: + + report_path = Path(tmpdir).absolute() + with tempfile.NamedTemporaryFile(buffering=0, delete=False) as tmp: + tmp.write( + "\n".join( + [ + f"---", + f"reporting:", + f" report_dir: {report_path}", + ] + ).encode("utf-8") + ) + tmp.close() + garak.cli.main( + f"-m test.Blank --report_prefix abs_path_test -p test.Blank -d always.Fail --config {tmp.name}".split() + ) + os.remove(tmp.name) + assert os.path.isfile(report_path / "abs_path_test.report.jsonl") + assert os.path.isfile(report_path / "abs_path_test.report.html") + assert os.path.isfile(report_path / "abs_path_test.hitlog.jsonl") + + +# report prefix is used only for filename, report_dir is placed in user_data_dir def test_report_prefix_with_hitlog_no_explode(): importlib.reload(_config) garak.cli.main( "-m test.Blank --report_prefix kjsfhgkjahpsfdg -p test.Blank -d always.Fail".split() ) - assert os.path.isfile("kjsfhgkjahpsfdg.report.jsonl") - assert os.path.isfile("kjsfhgkjahpsfdg.report.html") - assert os.path.isfile("kjsfhgkjahpsfdg.hitlog.jsonl") + report_path = Path(_config.transient.report_filename).parent + assert _config.reporting.report_dir in str(report_path) + assert str(_config.transient.data_dir) in str(report_path) + assert os.path.isfile(report_path / "kjsfhgkjahpsfdg.report.jsonl") + assert os.path.isfile(report_path / "kjsfhgkjahpsfdg.report.html") + assert os.path.isfile(report_path / "kjsfhgkjahpsfdg.hitlog.jsonl") def test_nested():