Skip to content

Commit

Permalink
Add support for multiple paths per fileset (#3195)
Browse files Browse the repository at this point in the history
We generally need more than one path per OS, because the logs location
is not always the same. For example, depending on the linux distribution
and how you installed it, MySQL can have it's error logs in a number of
default "paths". The solution is to configure them all, which means that
Filebeat might try to access unexisting folders.

This also improves the python prototype to accept multiple modules and
to accept namespaced parameters. E.g.:

./filebeat.py --modules=nginx,syslog -M nginx.access.paths=...
  • Loading branch information
tsg authored and ruflin committed Dec 15, 2016
1 parent 2f207bd commit cc3c257
Show file tree
Hide file tree
Showing 12 changed files with 101 additions and 59 deletions.
67 changes: 37 additions & 30 deletions filebeat/filebeat.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,27 +13,23 @@
def main():
parser = argparse.ArgumentParser(
description="PROTOTYPE: start filebeat with a module configuration")
parser.add_argument("--module", default="",
parser.add_argument("--modules", default="",
help="From branch")
parser.add_argument("--nginx", action="store_true",
help="Shortcut for --module nginx")
parser.add_argument("--es", default="http://localhost:9200",
help="Elasticsearch URL")
parser.add_argument("-E", nargs="*", type=str, default=None,
parser.add_argument("-M", nargs="*", type=str, default=None,
help="Variables overrides. e.g. path=/test")

args = parser.parse_args()
print args

if args.nginx:
args.module = "nginx"

if args.module == "":
print("You need to specify a module")
modules = args.modules.split(",")
if len(modules) == 0:
print("You need to specify at least a module")
sys.exit(1)

load_dashboards(args)
load_datasets(args, args.module)
load_datasets(args, modules)


def load_dashboards(args):
Expand All @@ -43,22 +39,23 @@ def load_dashboards(args):
subprocess.Popen(cmd).wait()


def load_datasets(args, module):
path = os.path.join("module", module)
if not os.path.isdir(path):
print("Module {} not found".format(module))
sys.exit(1)
print("Found module {} in {}".format(module, path))
def load_datasets(args, modules):
prospectors = ""
for module in modules:
path = os.path.join("module", module)
if not os.path.isdir(path):
print("Module {} not found".format(module))
sys.exit(1)
print("Found module {} in {}".format(module, path))

filesets = [name for name in os.listdir(path) if
os.path.isfile(os.path.join(path, name, "manifest.yml"))]
filesets = [name for name in os.listdir(path) if
os.path.isfile(os.path.join(path, name, "manifest.yml"))]

print("Found filesets: {}".format(filesets))
print("Found filesets: {}".format(filesets))

prospectors = ""
for fileset in filesets:
prospectors += load_fileset(args, module, fileset,
os.path.join(path, fileset))
for fileset in filesets:
prospectors += load_fileset(args, module, fileset,
os.path.join(path, fileset))

run_filebeat(args, prospectors)

Expand All @@ -67,7 +64,7 @@ def load_datasets(args, module):

def load_fileset(args, module, fileset, path):
manifest = yaml.load(file(os.path.join(path, "manifest.yml"), "r"))
var = evaluate_vars(args, manifest["vars"])
var = evaluate_vars(args, manifest["vars"], module, fileset)
var["beat"] = dict(module=module, fileset=fileset, path=path, args=args)
print("Evaluated variables: {}".format(var))

Expand All @@ -77,25 +74,35 @@ def load_fileset(args, module, fileset, path):
return var["beat"]["prospectors"]


def evaluate_vars(args, var_in):
def evaluate_vars(args, var_in, module, fileset):
var = {
"builtin": get_builtin_vars()
}
for name, vals in var_in.items():
var[name] = vals["default"]

if sys.platform == "darwin" and "os.darwin" in vals:
var[name] = vals["os.darwin"]
elif sys.platform == "windows" and "os.windows" in vals:
var[name] = vals["os.windows"]

var[name] = Template(var[name]).render(var)
if isinstance(var[name], basestring):
var[name] = Template(var[name]).render(var)
elif isinstance(var[name], list):
# only supports array of strings atm
var[name] = [Template(x).render(var) for x in var[name]]

# overrides
if args.E is not None:
for pair in args.E:
if args.M is not None:
for pair in args.M:
key, val = pair.partition("=")[::2]
var[key] = val
if key.startswith("{}.{}.".format(module, fileset)):
key = key[len("{}.{}.".format(module, fileset)):]

# this is a hack in the prototype only, because
# here we don't know the type of each variable type.
if key == "paths":
val = val.split(",")
var[key] = val

return var

Expand Down
5 changes: 4 additions & 1 deletion filebeat/module/mysql/error/config/error.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
- input_type: log
paths:
- {{path}}
{%- for path in paths %}
- {{path}}
{%- endfor %}
exclude_files: [".gz$"]
fields:
source_type: mysql-error
pipeline_id: {{beat.pipeline_id}}
12 changes: 8 additions & 4 deletions filebeat/module/mysql/error/manifest.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
module_version: 1.0

vars:
path:
default: /var/log/mysql/error.log
os.darwin: /usr/local/var/mysql/{{builtin.hostname}}.{{builtin.domain}}.err
os.windows: "c:/programdata/MySQL/MySQL Server*/error.log"
paths:
default:
- /var/log/mysql/error.log*
- /var/log/mysqld.log*
os.darwin:
- /usr/local/var/mysql/{{builtin.hostname}}.{{builtin.domain}}.err*
os.windows:
- "c:/programdata/MySQL/MySQL Server*/error.log*"

ingest_pipeline: ingest/pipeline.json
prospectors:
Expand Down
5 changes: 4 additions & 1 deletion filebeat/module/mysql/slowlog/config/slowlog.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
- input_type: log
paths:
- {{path}}
{%- for path in paths %}
- {{path}}
{%- endfor %}
exclude_files: [".gz$"]
multiline:
pattern: "^# User@Host: "
negate: true
Expand Down
12 changes: 8 additions & 4 deletions filebeat/module/mysql/slowlog/manifest.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
module_version: 1.0

vars:
path:
default: /var/log/mysql/mysql-slow.log
os.darwin: /usr/local/var/mysql/{{builtin.hostname}}-slow.log
os.windows: "c:/programdata/MySQL/MySQL Server*/mysql-slow.log"
paths:
default:
- /var/log/mysql/mysql-slow.log*
- /var/lib/mysql/{{builtin.hostname}}-slow.log
os.darwin:
- /usr/local/var/mysql/{{builtin.hostname}}-slow.log*
os.windows:
- "c:/programdata/MySQL/MySQL Server*/mysql-slow.log*"

ingest_pipeline: ingest/pipeline.json
prospectors:
Expand Down
5 changes: 4 additions & 1 deletion filebeat/module/nginx/access/config/nginx-access.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
- input_type: log
paths:
- {{path}}/access*.log
{%- for path in paths %}
- {{path}}
{%- endfor %}
exclude_files: [".gz$"]
fields:
source_type: nginx-access
pipeline_id: {{beat.pipeline_id}}
11 changes: 7 additions & 4 deletions filebeat/module/nginx/access/manifest.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
module_version: 1.0

vars:
path:
default: /var/log/nginx
os.darwin: /usr/local/var/log/nginx
os.windows: c:/programfiles/nginx/logs
paths:
default:
- /var/log/nginx/access.log*
os.darwin:
- /usr/local/var/log/nginx/access.log*
os.windows:
- c:/programfiles/nginx/logs/access.log*
pipeline:
# options: with_plugins, no_plugins, json_with_plugins, json_no_plugins
default: with_plugins
Expand Down
5 changes: 4 additions & 1 deletion filebeat/module/nginx/error/config/nginx-error.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
- input_type: log
paths:
- {{path}}/error.log
{%- for path in paths %}
- {{path}}
{%- endfor %}
exclude_files: [".gz$"]
fields:
source_type: nginx-error
pipeline_id: {{beat.pipeline_id}}
Expand Down
12 changes: 7 additions & 5 deletions filebeat/module/nginx/error/ingest/pipeline.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,7 @@
"patterns": [
"%{DATA:nginx.error.time} \\[%{DATA:nginx.error.level}\\] %{NUMBER:nginx.error.pid}#%{NUMBER:nginx.error.tid}: (\\*%{NUMBER:nginx.error.connection_id} )?%{GREEDYDATA:nginx.error.message}"
],
"on_failure" : [{
"set" : {
"field" : "error",
"value" : "The error pattern didn't match on this event."
}
"ignore_missing": true
}
},{
"remove":{
Expand All @@ -31,5 +27,11 @@
"remove": {
"field": "nginx.error.time"
}
}],
"on_failure" : [{
"set" : {
"field" : "error",
"value" : "{{ _ingest.on_failure_message }}"
}
}]
}
11 changes: 7 additions & 4 deletions filebeat/module/nginx/error/manifest.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
module_version: 1.0

vars:
path:
default: /var/log/nginx
os.darwin: /usr/local/var/log/nginx
os.windows: c:/programfiles/nginx/logs
paths:
default:
- /var/log/nginx/error.log*
os.darwin:
- /usr/local/var/log/nginx/error.log*
os.windows:
- c:/programfiles/nginx/logs/error.log*

ingest_pipeline: ingest/pipeline.json
prospectors:
Expand Down
5 changes: 4 additions & 1 deletion filebeat/module/syslog/system/config/system.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
- input_type: log
paths:
- {{path}}
{%- for path in paths %}
- {{path}}
{%- endfor %}
exclude_files: [".gz$"]
multiline:
pattern: "^\\s"
match: after
Expand Down
10 changes: 7 additions & 3 deletions filebeat/module/syslog/system/manifest.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
module_version: 1.0

vars:
path:
default: /var/log/messages
os.darwin: /var/log/system.log
paths:
default:
- /var/log/messages*
- /var/log/syslog*
os.darwin:
- /var/log/system.log*
os.windows: []

ingest_pipeline: ingest/pipeline.json
prospectors:
Expand Down

0 comments on commit cc3c257

Please sign in to comment.