Skip to content

Commit

Permalink
Extract device type from user agent info (#69322)
Browse files Browse the repository at this point in the history
  • Loading branch information
shahzad31 authored Mar 29, 2021
1 parent 06d5f94 commit f7efa3e
Show file tree
Hide file tree
Showing 16 changed files with 1,043 additions and 16 deletions.
5 changes: 3 additions & 2 deletions docs/reference/ingest/common-log-format-example.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ the processors as follows:

[options="header"]
|====
| Processor type | Field | Additional options | Description
| Processor type | Field | Additional options | Description

| <<date-processor,**Date**>>
| `@timestamp`
Expand Down Expand Up @@ -247,7 +247,8 @@ The API returns:
},
"name": "Chrome",
"device": {
"name": "Mac"
"name": "Mac",
"type": "Desktop"
},
"version": "52.0.2743.116"
}
Expand Down
3 changes: 2 additions & 1 deletion docs/reference/ingest/processors/user-agent.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ Which returns
"full": "Mac OS X 10.10.5"
},
"device" : {
"name" : "Mac"
"name" : "Mac",
"type" : "Desktop"
},
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.ingest.useragent;

import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentType;

import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import static org.elasticsearch.ingest.useragent.UserAgentParser.readParserConfigurations;
import static org.elasticsearch.ingest.useragent.UserAgentParser.VersionedName;

public class DeviceTypeParser {

private static final String OS_PARSERS = "os_parsers";
private static final String BROWSER_PARSER = "browser_parsers";
private static final String DEVICE_PARSER = "device_parsers";
private static final String AGENT_STRING_PARSER = "agent_string_parsers";
private static final String robot = "Robot", tablet = "Tablet", desktop = "Desktop", phone = "Phone";

private final List<String> patternListKeys = List.of(OS_PARSERS, BROWSER_PARSER, DEVICE_PARSER, AGENT_STRING_PARSER);

private final HashMap<String, ArrayList<DeviceTypeSubPattern>> deviceTypePatterns = new HashMap<>();

public void init(InputStream regexStream) throws IOException {
// EMPTY is safe here because we don't use namedObject
XContentParser yamlParser = XContentFactory.xContent(XContentType.YAML).createParser(NamedXContentRegistry.EMPTY,
LoggingDeprecationHandler.INSTANCE, regexStream);

XContentParser.Token token = yamlParser.nextToken();

if (token == XContentParser.Token.START_OBJECT) {
token = yamlParser.nextToken();

for (; token != null; token = yamlParser.nextToken()) {
String currentName = yamlParser.currentName();
if (token == XContentParser.Token.FIELD_NAME && patternListKeys.contains(currentName)) {
List<Map<String, String>> parserConfigurations = readParserConfigurations(yamlParser);
ArrayList<DeviceTypeSubPattern> subPatterns = new ArrayList<>();
for (Map<String, String> map : parserConfigurations) {
subPatterns.add(new DeviceTypeSubPattern(Pattern.compile((map.get("regex"))),
map.get("replacement")));
}
deviceTypePatterns.put(currentName, subPatterns);
}
}
}

if (patternListKeys.size() != deviceTypePatterns.size()) {
throw new ElasticsearchParseException("not a valid regular expression file");
}
}

public String findDeviceType(String agentString, VersionedName userAgent, VersionedName os, VersionedName device) {
if (deviceTypePatterns.isEmpty()) {
return null;
}
if (agentString != null) {
String deviceType = findMatch(deviceTypePatterns.get(AGENT_STRING_PARSER), agentString);
if (deviceType != null) {
return deviceType;
}
}
return findDeviceType(userAgent, os, device);
}

public String findDeviceType(VersionedName userAgent, VersionedName os, VersionedName device) {

if (deviceTypePatterns.isEmpty()) {
return null;
}

ArrayList<String> extractedDeviceTypes = new ArrayList<>();

for (String patternKey : patternListKeys) {
String deviceType = null;
switch (patternKey) {
case OS_PARSERS:
if (os != null && os.name != null) {
deviceType = findMatch(deviceTypePatterns.get(patternKey), os.name);
}
break;
case BROWSER_PARSER:
if (userAgent != null && userAgent.name != null) {
deviceType = findMatch(deviceTypePatterns.get(patternKey), userAgent.name);
}
break;
case DEVICE_PARSER:
if (device != null && device.name != null) {
deviceType = findMatch(deviceTypePatterns.get(patternKey), device.name);
}
break;
default:
break;
}

if (deviceType != null) {
extractedDeviceTypes.add(deviceType);
}
}


if (extractedDeviceTypes.contains(robot)) {
return robot;
}
if (extractedDeviceTypes.contains(tablet)) {
return tablet;
}
if (extractedDeviceTypes.contains(phone)) {
return phone;
}
if (extractedDeviceTypes.contains(desktop)) {
return desktop;
}

return "Other";
}

private String findMatch(List<DeviceTypeSubPattern> possiblePatterns, String matchString) {
String name;
for (DeviceTypeSubPattern pattern : possiblePatterns) {
name = pattern.match(matchString);
if (name != null) {
return name;
}
}
return null;
}

static final class DeviceTypeSubPattern {
private final Pattern pattern;
private final String nameReplacement;

DeviceTypeSubPattern(Pattern pattern, String nameReplacement) {
this.pattern = pattern;
this.nameReplacement = nameReplacement;
}

public String match(String matchString) {
String name = null;

Matcher matcher = pattern.matcher(matchString);

if (matcher.find() == false) {
return null;
}

int groupCount = matcher.groupCount();

if (nameReplacement != null) {
if (nameReplacement.contains("$1") && groupCount >= 1 && matcher.group(1) != null) {
name = nameReplacement.replaceFirst("\\$1", Matcher.quoteReplacement(matcher.group(1)));
} else {
name = nameReplacement;
}
}

return name;
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ static Map<String, UserAgentParser> createUserAgentParsers(Path userAgentConfigD
Map<String, UserAgentParser> userAgentParsers = new HashMap<>();

UserAgentParser defaultParser = new UserAgentParser(DEFAULT_PARSER_NAME,
IngestUserAgentPlugin.class.getResourceAsStream("/regexes.yml"), cache);
IngestUserAgentPlugin.class.getResourceAsStream("/regexes.yml"),
IngestUserAgentPlugin.class.getResourceAsStream("/device_type_regexes.yml"), cache);
userAgentParsers.put(DEFAULT_PARSER_NAME, defaultParser);

if (Files.exists(userAgentConfigDirectory) && Files.isDirectory(userAgentConfigDirectory)) {
Expand All @@ -66,8 +67,9 @@ static Map<String, UserAgentParser> createUserAgentParsers(Path userAgentConfigD
Iterable<Path> iterable = regexFiles::iterator;
for (Path path : iterable) {
String parserName = path.getFileName().toString();
try (InputStream regexStream = Files.newInputStream(path, StandardOpenOption.READ)) {
userAgentParsers.put(parserName, new UserAgentParser(parserName, regexStream, cache));
try (InputStream regexStream = Files.newInputStream(path, StandardOpenOption.READ);
InputStream deviceTypeRegexStream = IngestUserAgentPlugin.class.getResourceAsStream("/device_type_regexes.yml")) {
userAgentParsers.put(parserName, new UserAgentParser(parserName, regexStream, deviceTypeRegexStream, cache));
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,21 @@
final class UserAgentParser {

private final UserAgentCache cache;
private final DeviceTypeParser deviceTypeParser = new DeviceTypeParser();
private final List<UserAgentSubpattern> uaPatterns = new ArrayList<>();
private final List<UserAgentSubpattern> osPatterns = new ArrayList<>();
private final List<UserAgentSubpattern> devicePatterns = new ArrayList<>();
private final String name;

UserAgentParser(String name, InputStream regexStream, UserAgentCache cache) {
UserAgentParser(String name, InputStream regexStream, InputStream deviceTypeRegexStream, UserAgentCache cache) {
this.name = name;
this.cache = cache;

try {
init(regexStream);
if (deviceTypeRegexStream != null) {
deviceTypeParser.init(deviceTypeRegexStream);
}
} catch (IOException e) {
throw new ElasticsearchParseException("error parsing regular expression file", e);
}
Expand Down Expand Up @@ -96,8 +100,8 @@ private Pattern compilePattern(String regex, String regex_flag) {
}
}

private List<Map<String, String>> readParserConfigurations(XContentParser yamlParser) throws IOException {
List <Map<String, String>> patternList = new ArrayList<>();
static List<Map<String, String>> readParserConfigurations(XContentParser yamlParser) throws IOException {
List<Map<String, String>> patternList = new ArrayList<>();

XContentParser.Token token = yamlParser.nextToken();
if (token != XContentParser.Token.START_ARRAY) {
Expand Down Expand Up @@ -156,9 +160,8 @@ public Details parse(String agentString) {
VersionedName userAgent = findMatch(uaPatterns, agentString);
VersionedName operatingSystem = findMatch(osPatterns, agentString);
VersionedName device = findMatch(devicePatterns, agentString);

details = new Details(userAgent, operatingSystem, device);

String deviceType = deviceTypeParser.findDeviceType(agentString, userAgent, operatingSystem, device);
details = new Details(userAgent, operatingSystem, device, deviceType);
cache.put(name, agentString, details);
}

Expand All @@ -182,11 +185,13 @@ static final class Details {
public final VersionedName userAgent;
public final VersionedName operatingSystem;
public final VersionedName device;
public final String deviceType;

Details(VersionedName userAgent, VersionedName operatingSystem, VersionedName device) {
Details(VersionedName userAgent, VersionedName operatingSystem, VersionedName device, String deviceType) {
this.userAgent = userAgent;
this.operatingSystem = operatingSystem;
this.device = device;
this.deviceType = deviceType;
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,14 @@ public IngestDocument execute(IngestDocument ingestDocument) {
Map<String, String> deviceDetails = new HashMap<>(1);
if (uaClient.device != null && uaClient.device.name != null) {
deviceDetails.put("name", uaClient.device.name);
deviceDetails.put("type", uaClient.deviceType);
} else {
deviceDetails.put("name", "Other");
if (uaClient.deviceType != null) {
deviceDetails.put("type", uaClient.deviceType);
} else {
deviceDetails.put("type", "Other");
}
}
uaDetails.put("device", deviceDetails);
break;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# Apache License, Version 2.0
# ===========================
#
# Copyright 2009 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

## Custom parser being added to support device types

os_parsers:
# Robot
- regex: 'Bot|bot|spider|Spider|Crawler|crawler|AppEngine-Google'
replacement: 'Robot'
# Desktop OS, Most Common
- regex: '^(Windows$|Windows NT$|Mac OS X|Linux$|Chrome OS|Fedora$|Ubuntu$)'
replacement: 'Desktop'
# Phone OS
- regex: '^(Android$|iOS|Windows Phone|Firefox OS|BlackBerry OS|KaiOS|Sailfish$|Maemo)'
replacement: 'Phone'
# Desktop OS, Not Common
- regex: '^(Windows XP|Windows 7|Windows 10|FreeBSD|OpenBSD|Arch Linux|Solaris|NetBSD|SUSE|SunOS|BeOS\/Haiku)'
replacement: 'Desktop'
- regex: 'Tablet|BlackBerry Tablet OS|iPad|FireOS|Crosswalk'
replacement: 'Tablet'

browser_parsers:
# Robot
- regex: 'Bot|bot|spider|Spider|Crawler|crawler|AppEngine-Google'
replacement: 'Robot'
# Desktop Browsers
- regex: '^(Chrome$|Chromium$|Edge$|Firefox$|IE$|Maxthon$|Opera$|Safari$|SeaMonkey$|Vivaldi$|Yandex Browser$)'
replacement: 'Desktop'
# Phone Browsers, Most Common
- regex: '^(Chrome Mobile$|Chrome Mobile iOS|Firefox Mobile|Firefox iOS|Edge Mobile|Android|Facebook|Instagram|IE Mobile)'
replacement: 'Phone'
# Phone Browsers, Not Common
- regex: '^(BlackBerry WebKit|OktaMobile|Sailfish Browser|Amazon Silk|Pinterest|Flipboard)'
replacement: 'Phone'
- regex: 'Tablet|BlackBerry Tablet OS|iPad|FireOS|Crosswalk'
replacement: 'Tablet'

device_parsers:
- regex: 'Tablet|BlackBerry Tablet OS|iPad|FireOS|Crosswalk|Kindle'
replacement: 'Tablet'
# Samsung tablets
- regex: 'SM-T\d+|SM-P\d+|GT-P\d+'
replacement: 'Tablet'
# other tablets
- regex: 'Asus Nexus \d+|Lenovo TB'
replacement: 'Tablet'

agent_string_parsers:
- regex: 'Synthetic|Scanner|Crawler|Site24x7|PagePeeker|SpeedCurve|RuxitSynthetic|Google Web Preview|Synthetic|SiteChecker|Parser'
replacement: 'Robot'
- regex: 'Tablet'
replacement: 'Tablet'

Loading

0 comments on commit f7efa3e

Please sign in to comment.