Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/openai llm #2

Merged
merged 12 commits into from
Nov 17, 2023
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -160,3 +160,5 @@ cython_debug/
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/

# Custom data folder for vectordb and sigma rules
sigmaiq/llm/data/*
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ encountered. Please report any issues [here](https://github.com/AttackIQ/SigmAIQ
Feature requests are also always welcome! pySigma tools/utils are currently not in the pre-release version,
and will be added in future releases.

# LLM Support
For LLM usage, see the [LLM README](sigmaiq/llm/README.md)

# Installation & Usage

## Installation
Expand Down
Empty file added examples/__init__.py
Empty file.
49 changes: 49 additions & 0 deletions examples/custom_field_mappings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# %% This example shows how to use the SigmAIQ pySigma wrapper to provide custom field mappings for a backend
# %% This will allow you to translate specific field names to custom field names during rule translation

# %% Import SigmAIQ
from sigmaiq import SigmAIQBackend, SigmAIQPipeline

# %% Import pprint for pretty printing, and copy for copying rules
from pprint import pprint
from copy import copy

# %% A basic Sigma Rule in YAML str to convert to a query.
# %% SigmAIQ also accepts a rule in JSON/Dict format, SigmaRule objects, and SigmaCollection objects

sigma_rule = """
title: whoami Command
description: Detects a basic whoami commandline execution
logsource:
product: windows
category: process_creation
detection:
selection1:
- CommandLine|contains: 'whoami.exe'
condition: selection1
"""

# %% Create SigmAIQ backend translate the rule to a Microsoft 365 Defender query
sigmaiq_backend = SigmAIQBackend(backend="splunk").create_backend()
query = sigmaiq_backend.translate(copy(sigma_rule)) # Returns List of queries

print("\nM365Defender Query: ", end="\n\n")
pprint(query[0])
print("\n-------------------")

# %% Create custom field mappings
# %% This will map the CommandLine field to a custom field named "CustomCommandLine"
custom_field_mappings = {"CommandLine": "CustomCommandLine"}
my_custom_pipeline = SigmAIQPipeline.from_fieldmap(custom_field_mappings, priority=0).create_pipeline()

# %% Create SigmAIQ backend translate the rule to a Microsoft 365 Defender query with our custom field mappings
sigmaiq_backend = SigmAIQBackend(
backend="splunk",
processing_pipeline=my_custom_pipeline).create_backend()

query = sigmaiq_backend.translate(copy(sigma_rule)) # Returns List of queries

print("\nM365Defender Query with Custom Fieldmappings: ", end="\n\n")
pprint(query[0])
print("\n-------------------")

36 changes: 36 additions & 0 deletions examples/llm_basic_usage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# %% This example will demonstrate how to use SigmAIQ to perform the following:
# %% 1. Download the latest Sigma Rule package release
# %% 2. Create embeddings of the Sigma Rules in the package
# %% 3. Create and save a VectorDB of the Sigma Rule embeddings
# %% 4. Use a similarity search on the VectorDB to find Sigma Rules similar to a provided query
from pprint import pprint

# %% NOTE, this example uses OpenAI for embeddings. Ensure you have an OpenAI API key set in your environment variable
# %% OPENAI_API_KEY

# %% Also ensure you have installed the correct requirements with:
# `pip install -r requirements/common.txt -r requirements/llm.txt`


# %% Import SigmAIQ LLM and OpenAIEmbeddings
from sigmaiq.llm.base import SigmaLLM

# %% Create a SigmaLLM object with default settings. See the class docstring for more information
sigma_llm = SigmaLLM()

# %% The `create_sigma_vectordb()` method will automatically do all the work for you :) (only run this once)
sigma_llm.create_sigma_vectordb(save=True) # Save locally to disk

# %% Run a similarity search on the vectordb for encoded powershell commands and print top 3 results
query = "Encoded powershell commands"
matching_rules = sigma_llm.simple_search(query, k=3)
for matching_rule in matching_rules:
print(matching_rule.page_content, end="\n\n-------------------\n\n")

# %% You can also load an existing vector store from disk (recommended)
sigma_llm.load_sigma_vectordb()

query = "certutil"
matching_rules = sigma_llm.simple_search(query, k=3)
for matching_rule in matching_rules:
print(matching_rule.page_content, end="\n\n-------------------\n\n")
58 changes: 58 additions & 0 deletions examples/llm_rule_translation_and_creation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# %% This example will demonstrate how to create a Sigma langchain agent chatbot, which can perform various tasks like
# %% automatically translate a rule for you, and create new rules from a users input.

# %% Import required SigmAIQ classes and functions
from sigmaiq.llm.toolkits.base import create_sigma_agent
from sigmaiq.llm.base import SigmaLLM

# %% Ensure we have our Sigma vector store setup with our base LLM class
sigma_llm = SigmaLLM()

try:
sigma_llm.load_sigma_vectordb()
except Exception as e:
print(e)
print("Creating new Sigma VectorDB")
sigma_llm.create_sigma_vectordb(save=True)

# %% Create a Sigma Agent Executor, and pass it our Sigma VectorDB
sigma_agent_executor = create_sigma_agent(sigma_vectorstore=sigma_llm.sigmadb)

# %% RULE TRANSLATION
# %% Have the agent automatically translate a Sigma rule to a Splunk query with the splunk_cim_dm pipeline

sigma_rule = r"""
title: whoami Command
description: Detects a basic whoami commandline execution
logsource:
product: windows
category: process_creation
detection:
selection1:
- CommandLine|contains: 'whoami.exe'
condition: selection1
"""

user_input = ("Translate the following Sigma rule to a Splunk query using the 'splunk_cim_dm' pipeline: \n\n" +
sigma_rule)

# answer = sigma_agent_executor.invoke({"input": user_input})
# print("\nRULE TRANSLATION:", end="\n\n")
#print(f"Question:\n {user_input}", end="\n\n")
#print(f"Answer: \n")
#print(answer.get('output'), end="\n\n")

# %% RULE CREATION
# %% The agent will take the user input, look up similar Sigma Rules in the Sigma vector store, then create a brand
# %% new rule based on the context of the users input and the similar Sigma Rules.

user_input = ("Create a Windows process creation Sigma Rule for certutil downloading a file "
"from definitely-not-malware.com, then translate it to a Microsoft 365 Defender query.")

answer = sigma_agent_executor.invoke({"input": user_input})
print("\nRULE CREATION:", end="\n\n")
print(f"Question:\n {user_input}", end="\n\n")
print(f"Answer: \n")
print(answer.get('output'), end="\n\n")


77 changes: 77 additions & 0 deletions examples/translate_sigma_rules.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# %% This example shows how to use the SigmAIQ pySigma wrapper to easily translate Sigma rules to queries
# %% easily, without having to worry about installing and configuring the correct backends, pipelines and other details.


# %% Import SigmAIQ
from sigmaiq import SigmAIQBackend

# %% Import pprint for pretty printing, and copy for copying rules
from pprint import pprint
from copy import copy

# %% A basic Sigma Rule in YAML str to convert to a query.
# %% SigmAIQ also accepts a rule in JSON/Dict format, SigmaRule objects, and SigmaCollection objects

sigma_rule = """
title: whoami Command
description: Detects a basic whoami commandline execution
logsource:
product: windows
category: process_creation
detection:
selection1:
- CommandLine|contains: 'whoami.exe'
condition: selection1
"""

# %% BACKENDS
# %% Show the available supported backends
print("Supported Backends:", end="\n\n")
pprint(SigmAIQBackend.display_available_backends())
print("\n-------------------")

# %% Create SigmAIQ backend translate the rule to a Microsoft 365 Defender query
# %% SigmAIQ will automatically select the best pipeline for the backend
sigmaiq_backend = SigmAIQBackend(backend="microsoft365defender").create_backend()
query = sigmaiq_backend.translate(copy(sigma_rule)) # Returns List of queries

print("\nM365Defender Query: ", end="\n\n")
pprint(query[0])
print("\n-------------------")

# %% PIPELINES
# %% Show the available pipelines with each backend
print("Available Pipelines:", end="\n\n")
pprint(SigmAIQBackend.display_all_associated_pipelines())
print("\n-------------------")

# %% Create SigmAIQ backend translate the rule to a Splunk search with the CIM pipeline
sigmaiq_backend = SigmAIQBackend(backend="splunk", processing_pipeline="splunk_cim_dm").create_backend()
query = sigmaiq_backend.translate(copy(sigma_rule))

print("\nSplunk CIM Query: ", end="\n\n")
pprint(query[0])
print("\n-------------------")

# %% OUTPUT FORMATS
# %% Show the available output formats with each backend
print("\nAvailable Output Formats:", end="\n\n")
pprint(SigmAIQBackend.display_backends_and_outputs())
print("\n-------------------")

# %% Change the output_format to an Enterprise Security Correlation Search stanza
sigmaiq_backend.set_output_format("stanza")
query = sigmaiq_backend.translate(copy(sigma_rule))

print("\nSplunk CIM Query, Stanza Output: ", end="\n\n")
pprint(query[0])
print("\n-------------------")


# %% You can also translate a Sigma rule to all supported backend, pipeline, and output format combinations at once.
# %% Any combination that is not supported will not be included in the results
# %% This is useful for testing and comparing the output of different backends and pipelines
queries = SigmAIQBackend.create_all_and_translate(copy(sigma_rule))

print("\n All Translations: ", end="\n\n")
pprint(queries)
14 changes: 9 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "SigmAIQ"
version = "0.2.4"
version = "0.3.0"
description = "Wrapper and tools for pySigma and Sigma rules"
authors = ["Stephen Lincoln <stephen.lincoln@attackiq.com>", "AttackIQ <support@attackiq.com>"]
readme = "README.md"
Expand All @@ -20,7 +20,7 @@ packages = [
]

[tool.poetry.dependencies]
python = "^3.8"
python = ">=3.8.1, <=3.11.6"
pysigma = "0.9.11"
certifi = "^2023.07.22"
pysigma-backend-carbonblack = "0.1.4"
Expand All @@ -39,16 +39,20 @@ pysigma-pipeline-sysmon = "1.0.2"
pysigma-pipeline-windows = "1.1.0"
importlib-resources = "^5.13.0"




[tool.poetry.dev-dependencies]
pytest = "^7.4.0"
pytest-cov = "^4.1.0"
black = "^23.7.0"
ruff = "^0.0.286"

[tool.poetry.group.llm]
optional = true

[tool.poetry.group.llm.dependencies]
langchain = "^0.0.335"
openai = "^1.2.4"
tiktoken = "^0.5.1"
faiss-cpu = "^1.7.4"

[build-system]
requires = ["poetry-core"]
Expand Down
26 changes: 1 addition & 25 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,25 +1 @@
certifi==2023.7.22 ; python_version >= "3.8" and python_version < "4.0"
charset-normalizer==3.2.0 ; python_version >= "3.8" and python_version < "4.0"
idna==3.4 ; python_version >= "3.8" and python_version < "4.0"
importlib-resources==5.13.0 ; python_version >= "3.8" and python_version < "4.0"
packaging==22.0 ; python_version >= "3.8" and python_version < "4.0"
pyparsing==3.1.1 ; python_version >= "3.8" and python_version < "4.0"
pysigma-backend-carbonblack==0.1.4 ; python_version >= "3.8" and python_version < "4.0"
pysigma-backend-cortexxdr==0.1.1 ; python_version >= "3.8" and python_version < "4.0"
pysigma-backend-elasticsearch==1.0.5 ; python_version >= "3.8" and python_version < "4.0"
pysigma-backend-insightidr==0.2.1 ; python_version >= "3.8" and python_version < "4.0"
pysigma-backend-loki==0.9.1 ; python_version >= "3.8" and python_version < "4.0"
pysigma-backend-microsoft365defender==0.2.1 ; python_version >= "3.8" and python_version < "4.0"
pysigma-backend-opensearch==1.0.0 ; python_version >= "3.8" and python_version < "4.0"
pysigma-backend-qradar-aql==0.2.3 ; python_version >= "3.8" and python_version < "4.0"
pysigma-backend-sentinelone==0.1.2 ; python_version >= "3.8" and python_version < "4.0"
pysigma-backend-splunk==1.0.2 ; python_version >= "3.8" and python_version < "4.0"
pysigma-backend-stix==0.1.8 ; python_version >= "3.8" and python_version < "4.0"
pysigma-pipeline-crowdstrike==1.0.0 ; python_version >= "3.8" and python_version < "4.0"
pysigma-pipeline-sysmon==1.0.2 ; python_version >= "3.8" and python_version < "4.0"
pysigma-pipeline-windows==1.1.0 ; python_version >= "3.8" and python_version < "4.0"
pysigma==0.9.11 ; python_version >= "3.8" and python_version < "4.0"
pyyaml==6.0.1 ; python_version >= "3.8" and python_version < "4.0"
requests==2.31.0 ; python_version >= "3.8" and python_version < "4.0"
urllib3==2.0.4 ; python_version >= "3.8" and python_version < "4.0"
zipp==3.16.2 ; python_version >= "3.8" and python_version < "3.10"
-r requirements/common.txt
25 changes: 25 additions & 0 deletions requirements/common.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
certifi==2023.7.22 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
charset-normalizer==3.3.2 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
idna==3.4 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
importlib-resources==5.13.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
packaging==22.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
pyparsing==3.1.1 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
pysigma-backend-carbonblack==0.1.4 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
pysigma-backend-cortexxdr==0.1.1 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
pysigma-backend-elasticsearch==1.0.5 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
pysigma-backend-insightidr==0.2.1 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
pysigma-backend-loki==0.9.1 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
pysigma-backend-microsoft365defender==0.2.1 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
pysigma-backend-opensearch==1.0.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
pysigma-backend-qradar-aql==0.2.3 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
pysigma-backend-sentinelone==0.1.2 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
pysigma-backend-splunk==1.0.2 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
pysigma-backend-stix==0.1.8 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
pysigma-pipeline-crowdstrike==1.0.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
pysigma-pipeline-sysmon==1.0.2 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
pysigma-pipeline-windows==1.1.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
pysigma==0.9.11 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
pyyaml==6.0.1 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
requests==2.31.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
urllib3==2.1.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
zipp==3.17.0 ; python_full_version >= "3.8.1" and python_version < "3.10"
60 changes: 60 additions & 0 deletions requirements/llm.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
aiohttp==3.8.6 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
aiosignal==1.3.1 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
annotated-types==0.6.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
anyio==3.7.1 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
async-timeout==4.0.3 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
attrs==23.1.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
backoff==2.2.1 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
beautifulsoup4==4.12.2 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
certifi==2023.7.22 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
chardet==5.2.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
charset-normalizer==3.3.2 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
click==8.1.7 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
colorama==0.4.6 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6" and platform_system == "Windows"
dataclasses-json==0.6.2 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
distro==1.8.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
emoji==2.8.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
exceptiongroup==1.1.3 ; python_full_version >= "3.8.1" and python_version < "3.11"
faiss-cpu==1.7.4 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
filetype==1.2.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
frozenlist==1.4.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
greenlet==3.0.1 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6" and (platform_machine == "win32" or platform_machine == "WIN32" or platform_machine == "AMD64" or platform_machine == "amd64" or platform_machine == "x86_64" or platform_machine == "ppc64le" or platform_machine == "aarch64")
h11==0.14.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
httpcore==1.0.2 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
httpx==0.25.1 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
idna==3.4 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
joblib==1.3.2 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
jsonpatch==1.33 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
jsonpointer==2.4 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
langchain==0.0.335 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
langdetect==1.0.9 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
langsmith==0.0.64 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
lxml==4.9.3 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
marshmallow==3.20.1 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
multidict==6.0.4 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
mypy-extensions==1.0.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
nltk==3.8.1 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
numpy==1.24.4 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
openai==1.2.4 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
packaging==22.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
pydantic-core==2.14.1 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
pydantic==2.5.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
python-iso639==2023.6.15 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
python-magic==0.4.27 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
pyyaml==6.0.1 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
rapidfuzz==3.5.2 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
regex==2023.10.3 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
requests==2.31.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
six==1.16.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
sniffio==1.3.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
soupsieve==2.5 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
sqlalchemy==2.0.23 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
tabulate==0.9.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
tenacity==8.2.3 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
tiktoken==0.5.1 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
tqdm==4.66.1 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
typing-extensions==4.8.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
typing-inspect==0.9.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
unstructured==0.10.30 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
urllib3==2.1.0 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
yarl==1.9.2 ; python_full_version >= "3.8.1" and python_full_version <= "3.11.6"
Loading