Merge branch 'main' into feature/hf_formats

leondz · Jul 5, 2024 · 8c4b224 · 8c4b224
2 parents 42a1304 + e5d2458
commit 8c4b224
Show file tree

Hide file tree

Showing 12 changed files with 108 additions and 21 deletions.
diff --git a/garak/buffs/base.py b/garak/buffs/base.py
@@ -33,6 +33,8 @@ class Buff(Configurable):
     bcp47 = None  # set of languages this buff should be constrained to
     active = True
 
+    DEFAULT_PARAMS = {}
+
     def __init__(self, config_root=_config) -> None:
         self._load_config(config_root)
         module = self.__class__.__module__.replace("garak.buffs.", "")

diff --git a/garak/detectors/base.py b/garak/detectors/base.py
@@ -32,6 +32,8 @@ class Detector(Configurable):
     # we focus on LLM output for detectors
     modality: dict = {"out": {"text"}}
 
+    DEFAULT_PARAMS = {}
+
     def _set_description(self):
         if "description" not in dir(self):
             try:

diff --git a/garak/exception.py b/garak/exception.py
@@ -24,3 +24,15 @@ class GarakBackoffTrigger(GarakException):
     """Thrown when backoff should be triggered"""
 
     pass
+
+
+class BadGeneratorException(GarakException):
+    """Generator config/description is not usable"""
+
+    pass
+
+
+class RateLimitHit(Exception):
+    """Raised when a rate limiting response is returned"""
+
+    pass
diff --git a/garak/generators/base.py b/garak/generators/base.py
@@ -11,6 +11,7 @@
 
 from garak import _config
 from garak.configurable import Configurable
+import garak.resources.theme
 
 
 class Generator(Configurable):
@@ -107,11 +108,12 @@ def generate(
         if generations_this_call == 1:
             outputs = self._call_model(prompt, 1)
 
-        if self.supports_multiple_generations:
+        elif self.supports_multiple_generations:
             outputs = self._call_model(prompt, generations_this_call)
 
         else:
             outputs = []
+
             if (
                 hasattr(_config.system, "parallel_requests")
                 and _config.system.parallel_requests
@@ -120,19 +122,25 @@ def generate(
             ):
                 from multiprocessing import Pool
 
-                bar = tqdm.tqdm(total=generations_this_call, leave=False)
-                bar.set_description(self.fullname[:55])
+                multi_generator_bar = tqdm.tqdm(
+                    total=generations_this_call,
+                    leave=False,
+                    colour=f"#{garak.resources.theme.GENERATOR_RGB}",
+                )
+                multi_generator_bar.set_description(self.fullname[:55])
 
                 with Pool(_config.system.parallel_requests) as pool:
                     for result in pool.imap_unordered(
                         self._call_model, [prompt] * generations_this_call
                     ):
                         outputs.append(result)
-                        bar.update(1)
+                        multi_generator_bar.update(1)
 
             else:
                 generation_iterator = tqdm.tqdm(
-                    list(range(generations_this_call)), leave=False
+                    list(range(generations_this_call)),
+                    leave=False,
+                    colour=f"#{garak.resources.theme.GENERATOR_RGB}",
                 )
                 generation_iterator.set_description(self.fullname[:55])
                 for i in generation_iterator:

diff --git a/garak/generators/nvcf.py b/garak/generators/nvcf.py
@@ -12,7 +12,7 @@
 import requests
 
 from garak import _config
-from garak.exception import ModelNameMissingError
+from garak.exception import ModelNameMissingError, BadGeneratorException
 from garak.generators.base import Generator
 
 
@@ -27,6 +27,11 @@ class NvcfChat(Generator):
         "invoke_url_base": "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/",
         "extra_nvcf_logging": False,
         "timeout": 60,
+        "version_id": None,  # string
+        "stop_on_404": True,
+        "extra_params": {  # extra params for the payload, e.g. "n":1 or "model":"google/gemma2b"
+            "stream": False
+        },
     }
 
     supports_multiple_generations = False
@@ -47,6 +52,9 @@ def __init__(self, name=None, generations=10, config_root=_config):
 
         self.invoke_url = self.invoke_url_base + self.name
 
+        if self.version_id is not None:
+            self.invoke_url += f"/versions/{self.version_id}"
+
         super().__init__(
             self.name, generations=self.generations, config_root=config_root
         )
@@ -66,6 +74,9 @@ def _build_payload(self, prompt) -> dict:
             "stream": False,
         }
 
+        for k, v in self.extra_params.items():
+            payload[k] = v
+
         return payload
 
     def _extract_text_output(self, response) -> str:
@@ -100,6 +111,7 @@ def _call_model(
             payload["seed"] = self.seed
 
         request_time = time.time()
+        logging.debug("nvcf : payload %s", repr(payload))
         response = session.post(self.invoke_url, headers=self.headers, json=payload)
 
         while response.status_code == 202:
@@ -115,11 +127,16 @@ def _call_model(
 
         if 400 <= response.status_code < 600:
             logging.warning("nvcf : returned error code %s", response.status_code)
-            logging.warning("nvcf : payload %s", repr(payload))
             logging.warning("nvcf : returned error body %s", response.content)
             if response.status_code == 400 and prompt == "":
                 # error messages for refusing a blank prompt are fragile and include multi-level wrapped JSON, so this catch is a little broad
                 return [None]
+            if response.status_code == 404 and self.stop_on_404:
+                msg = "nvcf : got 404, endpoint unavailable, stopping"
+                logging.critical(msg)
+                print("\n\n" + msg)
+                print("nvcf :", response.content)
+                raise BadGeneratorException()
             if response.status_code >= 500:
                 if response.status_code == 500 and json.loads(response.content)[
                     "detail"
@@ -151,6 +168,9 @@ def _build_payload(self, prompt) -> dict:
             "stream": False,
         }
 
+        for k, v in self.extra_params.items():
+            payload[k] = v
+
         return payload
 
     def _extract_text_output(self, response) -> str:

diff --git a/garak/generators/rest.py b/garak/generators/rest.py
@@ -16,16 +16,10 @@
 from jsonpath_ng.exceptions import JsonPathParserError
 
 from garak import _config
-from garak.exception import APIKeyMissingError
+from garak.exception import APIKeyMissingError, RateLimitHit
 from garak.generators.base import Generator
 
 
-class RESTRateLimitError(Exception):
-    """Raised when a rate limiting response is returned"""
-
-    pass
-
-
 class RestGenerator(Generator):
     """Generic API interface for REST models
 
@@ -247,7 +241,7 @@ def _populate_template(
         return output.replace("$INPUT", self.escape_function(text))
 
     # we'll overload IOError as the rate limit exception
-    @backoff.on_exception(backoff.fibo, RESTRateLimitError, max_value=70)
+    @backoff.on_exception(backoff.fibo, RateLimitHit, max_value=70)
     def _call_model(
         self, prompt: str, generations_this_call: int = 1
     ) -> List[Union[str, None]]:
@@ -274,9 +268,7 @@ def _call_model(
         }
         resp = self.http_function(self.uri, **req_kArgs)
         if resp.status_code in self.ratelimit_codes:
-            raise RESTRateLimitError(
-                f"Rate limited: {resp.status_code} - {resp.reason}"
-            )
+            raise RateLimitHit(f"Rate limited: {resp.status_code} - {resp.reason}")
 
         elif str(resp.status_code)[0] == "3":
             raise NotImplementedError(

diff --git a/garak/harnesses/base.py b/garak/harnesses/base.py
@@ -29,6 +29,8 @@ class Harness(Configurable):
 
     active = True
 
+    DEFAULT_PARAMS = {}
+
     def __init__(self, config_root=_config):
         self._load_config(config_root)
         logging.info("harness init: %s", self)

diff --git a/garak/probes/base.py b/garak/probes/base.py
@@ -49,6 +49,8 @@ class Probe(Configurable):
     # we focus on LLM input for probe
     modality: dict = {"in": {"text"}}
 
+    DEFAULT_PARAMS = {}
+
     def __init__(self, config_root=_config):
         """Sets up a probe. This constructor:
         1. populates self.probename based on the class name,

diff --git a/garak/probes/encoding.py b/garak/probes/encoding.py
@@ -562,7 +562,7 @@ def zalgo(text: bytes) -> bytes:
         from zalgolib.zalgolib import enzalgofy
 
         zalged = enzalgofy(
-            text=text.decode(), intensity=20
+            text=text.decode(), intensity=10
         )  # default value of 50 explodes prompt length
 
         return bytes(zalged, "utf-8")

diff --git a/tests/generators/test_nvcf.py b/tests/generators/test_nvcf.py
@@ -0,0 +1,49 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+import pytest
+
+from garak import _config
+from garak import _plugins
+import garak.generators.base
+import garak.generators.nvcf
+
+PLUGINS = ("NvcfChat", "NvcfCompletion")
+
+
+@pytest.mark.parametrize("klassname", PLUGINS)
+def test_instantiate(klassname):
+    _config.plugins.generators["nvcf"] = {}
+    _config.plugins.generators["nvcf"][klassname] = {}
+    _config.plugins.generators["nvcf"][klassname]["name"] = "placeholder name"
+    _config.plugins.generators["nvcf"][klassname]["api_key"] = "placeholder key"
+    g = _plugins.load_plugin(f"generators.nvcf.{klassname}")
+    assert isinstance(g, garak.generators.base.Generator)
+
+
+@pytest.mark.parametrize("klassname", PLUGINS)
+def test_version_endpoint(klassname):
+    name = "feedfacedeadbeef"
+    version = "cafebabe"
+    _config.plugins.generators["nvcf"] = {}
+    _config.plugins.generators["nvcf"][klassname] = {}
+    _config.plugins.generators["nvcf"][klassname]["name"] = name
+    _config.plugins.generators["nvcf"][klassname]["api_key"] = "placeholder key"
+    _config.plugins.generators["nvcf"][klassname]["version_id"] = version
+    g = _plugins.load_plugin(f"generators.nvcf.{klassname}")
+    assert g.invoke_url == f"{g.invoke_url_base}{name}/versions/{version}"
+
+
+@pytest.mark.parametrize("klassname", PLUGINS)
+def test_custom_keys(klassname):
+    name = "feedfacedeadbeef"
+    params = {"n": 1, "model": "secret/model_1.8t"}
+    _config.plugins.generators["nvcf"] = {}
+    _config.plugins.generators["nvcf"][klassname] = {}
+    _config.plugins.generators["nvcf"][klassname]["name"] = name
+    _config.plugins.generators["nvcf"][klassname]["api_key"] = "placeholder key"
+    _config.plugins.generators["nvcf"][klassname]["extra_params"] = params
+    g = _plugins.load_plugin(f"generators.nvcf.{klassname}")
+    test_payload = g._build_payload("whatever prompt")
+    for k, v in params.items():
+        assert test_payload[k] == v
diff --git a/tests/generators/test_openai_compatible.py b/tests/generators/test_openai_compatible.py
@@ -9,7 +9,6 @@
 import inspect
 
 from collections.abc import Iterable
-from garak import _plugins
 from garak.generators.openai import OpenAICompatible
 
 

diff --git a/tests/test_config.py b/tests/test_config.py
@@ -434,7 +434,6 @@ def test_cli_generator_options_overrides_yaml_probe_options():
             str(cli_generations_count),
             "--list_config",
         ]  # add list_config as the action so we don't actually run
-        print(args)
         garak.cli.main(args)
         os.remove(generator_yaml_file.name)
     # check it was loaded