diff --git a/docs/reference/ingest/common-log-format-example.asciidoc b/docs/reference/ingest/common-log-format-example.asciidoc index 0dffcc96ecf24..4f9e9be41632e 100644 --- a/docs/reference/ingest/common-log-format-example.asciidoc +++ b/docs/reference/ingest/common-log-format-example.asciidoc @@ -53,7 +53,7 @@ the processors as follows: [options="header"] |==== -| Processor type | Field | Additional options | Description +| Processor type | Field | Additional options | Description | <> | `@timestamp` @@ -247,7 +247,8 @@ The API returns: }, "name": "Chrome", "device": { - "name": "Mac" + "name": "Mac", + "type": "Desktop" }, "version": "52.0.2743.116" } diff --git a/docs/reference/ingest/processors/user-agent.asciidoc b/docs/reference/ingest/processors/user-agent.asciidoc index b2a45b6395331..690bf3a98cc7d 100644 --- a/docs/reference/ingest/processors/user-agent.asciidoc +++ b/docs/reference/ingest/processors/user-agent.asciidoc @@ -69,7 +69,8 @@ Which returns "full": "Mac OS X 10.10.5" }, "device" : { - "name" : "Mac" + "name" : "Mac", + "type" : "Desktop" }, } } diff --git a/modules/ingest-user-agent/src/main/java/org/elasticsearch/ingest/useragent/DeviceTypeParser.java b/modules/ingest-user-agent/src/main/java/org/elasticsearch/ingest/useragent/DeviceTypeParser.java new file mode 100644 index 0000000000000..0108e428ba451 --- /dev/null +++ b/modules/ingest-user-agent/src/main/java/org/elasticsearch/ingest/useragent/DeviceTypeParser.java @@ -0,0 +1,179 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.useragent; + +import org.elasticsearch.ElasticsearchParseException; +import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; +import org.elasticsearch.common.xcontent.NamedXContentRegistry; +import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.XContentType; + +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import static org.elasticsearch.ingest.useragent.UserAgentParser.readParserConfigurations; +import static org.elasticsearch.ingest.useragent.UserAgentParser.VersionedName; + +public class DeviceTypeParser { + + private static final String OS_PARSERS = "os_parsers"; + private static final String BROWSER_PARSER = "browser_parsers"; + private static final String DEVICE_PARSER = "device_parsers"; + private static final String AGENT_STRING_PARSER = "agent_string_parsers"; + private static final String robot = "Robot", tablet = "Tablet", desktop = "Desktop", phone = "Phone"; + + private final List patternListKeys = List.of(OS_PARSERS, BROWSER_PARSER, DEVICE_PARSER, AGENT_STRING_PARSER); + + private final HashMap> deviceTypePatterns = new HashMap<>(); + + public void init(InputStream regexStream) throws IOException { + // EMPTY is safe here because we don't use namedObject + XContentParser yamlParser = XContentFactory.xContent(XContentType.YAML).createParser(NamedXContentRegistry.EMPTY, + LoggingDeprecationHandler.INSTANCE, regexStream); + + XContentParser.Token token = yamlParser.nextToken(); + + if (token == XContentParser.Token.START_OBJECT) { + token = yamlParser.nextToken(); + + for (; token != null; token = yamlParser.nextToken()) { + String currentName = yamlParser.currentName(); + if (token == XContentParser.Token.FIELD_NAME && patternListKeys.contains(currentName)) { + List> parserConfigurations = readParserConfigurations(yamlParser); + ArrayList subPatterns = new ArrayList<>(); + for (Map map : parserConfigurations) { + subPatterns.add(new DeviceTypeSubPattern(Pattern.compile((map.get("regex"))), + map.get("replacement"))); + } + deviceTypePatterns.put(currentName, subPatterns); + } + } + } + + if (patternListKeys.size() != deviceTypePatterns.size()) { + throw new ElasticsearchParseException("not a valid regular expression file"); + } + } + + public String findDeviceType(String agentString, VersionedName userAgent, VersionedName os, VersionedName device) { + if (deviceTypePatterns.isEmpty()) { + return null; + } + if (agentString != null) { + String deviceType = findMatch(deviceTypePatterns.get(AGENT_STRING_PARSER), agentString); + if (deviceType != null) { + return deviceType; + } + } + return findDeviceType(userAgent, os, device); + } + + public String findDeviceType(VersionedName userAgent, VersionedName os, VersionedName device) { + + if (deviceTypePatterns.isEmpty()) { + return null; + } + + ArrayList extractedDeviceTypes = new ArrayList<>(); + + for (String patternKey : patternListKeys) { + String deviceType = null; + switch (patternKey) { + case OS_PARSERS: + if (os != null && os.name != null) { + deviceType = findMatch(deviceTypePatterns.get(patternKey), os.name); + } + break; + case BROWSER_PARSER: + if (userAgent != null && userAgent.name != null) { + deviceType = findMatch(deviceTypePatterns.get(patternKey), userAgent.name); + } + break; + case DEVICE_PARSER: + if (device != null && device.name != null) { + deviceType = findMatch(deviceTypePatterns.get(patternKey), device.name); + } + break; + default: + break; + } + + if (deviceType != null) { + extractedDeviceTypes.add(deviceType); + } + } + + + if (extractedDeviceTypes.contains(robot)) { + return robot; + } + if (extractedDeviceTypes.contains(tablet)) { + return tablet; + } + if (extractedDeviceTypes.contains(phone)) { + return phone; + } + if (extractedDeviceTypes.contains(desktop)) { + return desktop; + } + + return "Other"; + } + + private String findMatch(List possiblePatterns, String matchString) { + String name; + for (DeviceTypeSubPattern pattern : possiblePatterns) { + name = pattern.match(matchString); + if (name != null) { + return name; + } + } + return null; + } + + static final class DeviceTypeSubPattern { + private final Pattern pattern; + private final String nameReplacement; + + DeviceTypeSubPattern(Pattern pattern, String nameReplacement) { + this.pattern = pattern; + this.nameReplacement = nameReplacement; + } + + public String match(String matchString) { + String name = null; + + Matcher matcher = pattern.matcher(matchString); + + if (matcher.find() == false) { + return null; + } + + int groupCount = matcher.groupCount(); + + if (nameReplacement != null) { + if (nameReplacement.contains("$1") && groupCount >= 1 && matcher.group(1) != null) { + name = nameReplacement.replaceFirst("\\$1", Matcher.quoteReplacement(matcher.group(1))); + } else { + name = nameReplacement; + } + } + + return name; + } + } + +} diff --git a/modules/ingest-user-agent/src/main/java/org/elasticsearch/ingest/useragent/IngestUserAgentPlugin.java b/modules/ingest-user-agent/src/main/java/org/elasticsearch/ingest/useragent/IngestUserAgentPlugin.java index c49b57442436f..dfaed02a2323e 100644 --- a/modules/ingest-user-agent/src/main/java/org/elasticsearch/ingest/useragent/IngestUserAgentPlugin.java +++ b/modules/ingest-user-agent/src/main/java/org/elasticsearch/ingest/useragent/IngestUserAgentPlugin.java @@ -55,7 +55,8 @@ static Map createUserAgentParsers(Path userAgentConfigD Map userAgentParsers = new HashMap<>(); UserAgentParser defaultParser = new UserAgentParser(DEFAULT_PARSER_NAME, - IngestUserAgentPlugin.class.getResourceAsStream("/regexes.yml"), cache); + IngestUserAgentPlugin.class.getResourceAsStream("/regexes.yml"), + IngestUserAgentPlugin.class.getResourceAsStream("/device_type_regexes.yml"), cache); userAgentParsers.put(DEFAULT_PARSER_NAME, defaultParser); if (Files.exists(userAgentConfigDirectory) && Files.isDirectory(userAgentConfigDirectory)) { @@ -66,8 +67,9 @@ static Map createUserAgentParsers(Path userAgentConfigD Iterable iterable = regexFiles::iterator; for (Path path : iterable) { String parserName = path.getFileName().toString(); - try (InputStream regexStream = Files.newInputStream(path, StandardOpenOption.READ)) { - userAgentParsers.put(parserName, new UserAgentParser(parserName, regexStream, cache)); + try (InputStream regexStream = Files.newInputStream(path, StandardOpenOption.READ); + InputStream deviceTypeRegexStream = IngestUserAgentPlugin.class.getResourceAsStream("/device_type_regexes.yml")) { + userAgentParsers.put(parserName, new UserAgentParser(parserName, regexStream, deviceTypeRegexStream, cache)); } } } diff --git a/modules/ingest-user-agent/src/main/java/org/elasticsearch/ingest/useragent/UserAgentParser.java b/modules/ingest-user-agent/src/main/java/org/elasticsearch/ingest/useragent/UserAgentParser.java index 9ef2c51c3d422..56cd668d4ecf9 100644 --- a/modules/ingest-user-agent/src/main/java/org/elasticsearch/ingest/useragent/UserAgentParser.java +++ b/modules/ingest-user-agent/src/main/java/org/elasticsearch/ingest/useragent/UserAgentParser.java @@ -26,17 +26,21 @@ final class UserAgentParser { private final UserAgentCache cache; + private final DeviceTypeParser deviceTypeParser = new DeviceTypeParser(); private final List uaPatterns = new ArrayList<>(); private final List osPatterns = new ArrayList<>(); private final List devicePatterns = new ArrayList<>(); private final String name; - UserAgentParser(String name, InputStream regexStream, UserAgentCache cache) { + UserAgentParser(String name, InputStream regexStream, InputStream deviceTypeRegexStream, UserAgentCache cache) { this.name = name; this.cache = cache; try { init(regexStream); + if (deviceTypeRegexStream != null) { + deviceTypeParser.init(deviceTypeRegexStream); + } } catch (IOException e) { throw new ElasticsearchParseException("error parsing regular expression file", e); } @@ -96,8 +100,8 @@ private Pattern compilePattern(String regex, String regex_flag) { } } - private List> readParserConfigurations(XContentParser yamlParser) throws IOException { - List > patternList = new ArrayList<>(); + static List> readParserConfigurations(XContentParser yamlParser) throws IOException { + List> patternList = new ArrayList<>(); XContentParser.Token token = yamlParser.nextToken(); if (token != XContentParser.Token.START_ARRAY) { @@ -156,9 +160,8 @@ public Details parse(String agentString) { VersionedName userAgent = findMatch(uaPatterns, agentString); VersionedName operatingSystem = findMatch(osPatterns, agentString); VersionedName device = findMatch(devicePatterns, agentString); - - details = new Details(userAgent, operatingSystem, device); - + String deviceType = deviceTypeParser.findDeviceType(agentString, userAgent, operatingSystem, device); + details = new Details(userAgent, operatingSystem, device, deviceType); cache.put(name, agentString, details); } @@ -182,11 +185,13 @@ static final class Details { public final VersionedName userAgent; public final VersionedName operatingSystem; public final VersionedName device; + public final String deviceType; - Details(VersionedName userAgent, VersionedName operatingSystem, VersionedName device) { + Details(VersionedName userAgent, VersionedName operatingSystem, VersionedName device, String deviceType) { this.userAgent = userAgent; this.operatingSystem = operatingSystem; this.device = device; + this.deviceType = deviceType; } } diff --git a/modules/ingest-user-agent/src/main/java/org/elasticsearch/ingest/useragent/UserAgentProcessor.java b/modules/ingest-user-agent/src/main/java/org/elasticsearch/ingest/useragent/UserAgentProcessor.java index 8c24e9f96bd3c..4f679f68aca2c 100644 --- a/modules/ingest-user-agent/src/main/java/org/elasticsearch/ingest/useragent/UserAgentProcessor.java +++ b/modules/ingest-user-agent/src/main/java/org/elasticsearch/ingest/useragent/UserAgentProcessor.java @@ -125,8 +125,14 @@ public IngestDocument execute(IngestDocument ingestDocument) { Map deviceDetails = new HashMap<>(1); if (uaClient.device != null && uaClient.device.name != null) { deviceDetails.put("name", uaClient.device.name); + deviceDetails.put("type", uaClient.deviceType); } else { deviceDetails.put("name", "Other"); + if (uaClient.deviceType != null) { + deviceDetails.put("type", uaClient.deviceType); + } else { + deviceDetails.put("type", "Other"); + } } uaDetails.put("device", deviceDetails); break; diff --git a/modules/ingest-user-agent/src/main/resources/device_type_regexes.yml b/modules/ingest-user-agent/src/main/resources/device_type_regexes.yml new file mode 100644 index 0000000000000..88a860a9d9d83 --- /dev/null +++ b/modules/ingest-user-agent/src/main/resources/device_type_regexes.yml @@ -0,0 +1,67 @@ +# Apache License, Version 2.0 +# =========================== +# +# Copyright 2009 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +## Custom parser being added to support device types + +os_parsers: + # Robot + - regex: 'Bot|bot|spider|Spider|Crawler|crawler|AppEngine-Google' + replacement: 'Robot' + # Desktop OS, Most Common + - regex: '^(Windows$|Windows NT$|Mac OS X|Linux$|Chrome OS|Fedora$|Ubuntu$)' + replacement: 'Desktop' + # Phone OS + - regex: '^(Android$|iOS|Windows Phone|Firefox OS|BlackBerry OS|KaiOS|Sailfish$|Maemo)' + replacement: 'Phone' + # Desktop OS, Not Common + - regex: '^(Windows XP|Windows 7|Windows 10|FreeBSD|OpenBSD|Arch Linux|Solaris|NetBSD|SUSE|SunOS|BeOS\/Haiku)' + replacement: 'Desktop' + - regex: 'Tablet|BlackBerry Tablet OS|iPad|FireOS|Crosswalk' + replacement: 'Tablet' + +browser_parsers: + # Robot + - regex: 'Bot|bot|spider|Spider|Crawler|crawler|AppEngine-Google' + replacement: 'Robot' + # Desktop Browsers + - regex: '^(Chrome$|Chromium$|Edge$|Firefox$|IE$|Maxthon$|Opera$|Safari$|SeaMonkey$|Vivaldi$|Yandex Browser$)' + replacement: 'Desktop' + # Phone Browsers, Most Common + - regex: '^(Chrome Mobile$|Chrome Mobile iOS|Firefox Mobile|Firefox iOS|Edge Mobile|Android|Facebook|Instagram|IE Mobile)' + replacement: 'Phone' + # Phone Browsers, Not Common + - regex: '^(BlackBerry WebKit|OktaMobile|Sailfish Browser|Amazon Silk|Pinterest|Flipboard)' + replacement: 'Phone' + - regex: 'Tablet|BlackBerry Tablet OS|iPad|FireOS|Crosswalk' + replacement: 'Tablet' + +device_parsers: + - regex: 'Tablet|BlackBerry Tablet OS|iPad|FireOS|Crosswalk|Kindle' + replacement: 'Tablet' + # Samsung tablets + - regex: 'SM-T\d+|SM-P\d+|GT-P\d+' + replacement: 'Tablet' + # other tablets + - regex: 'Asus Nexus \d+|Lenovo TB' + replacement: 'Tablet' + +agent_string_parsers: + - regex: 'Synthetic|Scanner|Crawler|Site24x7|PagePeeker|SpeedCurve|RuxitSynthetic|Google Web Preview|Synthetic|SiteChecker|Parser' + replacement: 'Robot' + - regex: 'Tablet' + replacement: 'Tablet' + diff --git a/modules/ingest-user-agent/src/test/java/org/elasticsearch/ingest/useragent/DeviceTypeParserTests.java b/modules/ingest-user-agent/src/test/java/org/elasticsearch/ingest/useragent/DeviceTypeParserTests.java new file mode 100644 index 0000000000000..dcde87fbb258e --- /dev/null +++ b/modules/ingest-user-agent/src/test/java/org/elasticsearch/ingest/useragent/DeviceTypeParserTests.java @@ -0,0 +1,217 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.useragent; +import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; +import org.elasticsearch.common.xcontent.NamedXContentRegistry; +import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.test.ESTestCase; + +import org.junit.BeforeClass; + +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + + +import static org.elasticsearch.ingest.useragent.UserAgentParser.VersionedName; + + +import static org.elasticsearch.ingest.useragent.UserAgentParser.readParserConfigurations; +import static org.hamcrest.Matchers.is; + +public class DeviceTypeParserTests extends ESTestCase { + + private static DeviceTypeParser deviceTypeParser; + + private ArrayList> readTestDevices(InputStream regexStream, String keyName) throws IOException { + XContentParser yamlParser = XContentFactory.xContent(XContentType.YAML).createParser(NamedXContentRegistry.EMPTY, + LoggingDeprecationHandler.INSTANCE, regexStream); + + XContentParser.Token token = yamlParser.nextToken(); + + ArrayList> testDevices = new ArrayList<>(); + + if (token == XContentParser.Token.START_OBJECT) { + token = yamlParser.nextToken(); + + for (; token != null; token = yamlParser.nextToken()) { + String currentName = yamlParser.currentName(); + if (token == XContentParser.Token.FIELD_NAME && currentName.equals(keyName)) { + List> parserConfigurations = readParserConfigurations(yamlParser); + + for (Map map : parserConfigurations) { + HashMap testDevice = new HashMap<>(); + + testDevice.put("type", map.get("type")); + testDevice.put("os", map.get("os")); + testDevice.put("browser", map.get("browser")); + testDevice.put("device", map.get("device")); + testDevices.add(testDevice); + + } + } + } + } + + return testDevices; + } + + private static VersionedName getVersionName(String name){ + return new VersionedName(name, null, null, null, null); + } + + @BeforeClass + public static void setupDeviceParser() throws IOException { + InputStream deviceTypeRegexStream = UserAgentProcessor.class.getResourceAsStream("/device_type_regexes.yml"); + + assertNotNull(deviceTypeRegexStream); + assertNotNull(deviceTypeRegexStream); + + deviceTypeParser = new DeviceTypeParser(); + deviceTypeParser.init(deviceTypeRegexStream); + } + + @SuppressWarnings("unchecked") + public void testMacDesktop() throws Exception { + VersionedName os = getVersionName("Mac OS X"); + + VersionedName userAgent = getVersionName("Chrome"); + + String deviceType = deviceTypeParser.findDeviceType(userAgent, os, null); + + assertThat(deviceType, is("Desktop")); + } + + @SuppressWarnings("unchecked") + public void testAndroidMobile() throws Exception { + + VersionedName os = getVersionName("iOS"); + + VersionedName userAgent = getVersionName("Safari"); + + String deviceType = deviceTypeParser.findDeviceType(userAgent, os, null); + + assertThat(deviceType, is("Phone")); + } + + @SuppressWarnings("unchecked") + public void testIPadTablet() throws Exception { + + VersionedName os = getVersionName("iOS"); + + VersionedName userAgent = getVersionName("Safari"); + + VersionedName device = getVersionName("iPad"); + + String deviceType = deviceTypeParser.findDeviceType(userAgent, os, device); + + assertThat(deviceType, is("Tablet")); + } + + @SuppressWarnings("unchecked") + public void testWindowDesktop() throws Exception { + + VersionedName os = getVersionName("Mac OS X"); + + VersionedName userAgent = getVersionName("Chrome"); + + String deviceType = deviceTypeParser.findDeviceType(userAgent, os, null); + + assertThat(deviceType, is("Desktop")); + } + + @SuppressWarnings("unchecked") + public void testRobotAgentString() throws Exception { + + String deviceType = deviceTypeParser.findDeviceType( + "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:63.0.247) Gecko/20100101 Firefox/63.0.247 Site24x7", null, null, null); + + assertThat(deviceType, is("Robot")); + } + + @SuppressWarnings("unchecked") + public void testRobotDevices() throws Exception { + + InputStream testRobotDevices = IngestUserAgentPlugin.class.getResourceAsStream("/test-robot-devices.yml"); + + ArrayList> testDevices = readTestDevices(testRobotDevices, "robot_devices"); + + for (HashMap testDevice : testDevices) { + VersionedName os = getVersionName(testDevice.get("os")); + + VersionedName userAgent = getVersionName(testDevice.get("browser")); + + String deviceType = deviceTypeParser.findDeviceType(userAgent, os, null); + + assertThat(deviceType, is("Robot")); + } + } + + @SuppressWarnings("unchecked") + public void testDesktopDevices() throws Exception { + + InputStream testDesktopDevices = IngestUserAgentPlugin.class.getResourceAsStream("/test-desktop-devices.yml"); + + ArrayList> testDevices = readTestDevices(testDesktopDevices, "desktop_devices"); + + for (HashMap testDevice : testDevices) { + VersionedName os = getVersionName(testDevice.get("os")); + + VersionedName userAgent = getVersionName(testDevice.get("browser")); + + String deviceType = deviceTypeParser.findDeviceType(userAgent, os, null); + + assertThat(deviceType, is("Desktop")); + } + } + + @SuppressWarnings("unchecked") + public void testMobileDevices() throws Exception { + + InputStream testMobileDevices = IngestUserAgentPlugin.class.getResourceAsStream("/test-mobile-devices.yml"); + + ArrayList> testDevices = readTestDevices(testMobileDevices, "mobile_devices"); + + for (HashMap testDevice : testDevices) { + VersionedName os = getVersionName(testDevice.get("os")); + + VersionedName userAgent = getVersionName(testDevice.get("browser")); + + String deviceType = deviceTypeParser.findDeviceType(userAgent, os, null); + + assertThat(deviceType, is("Phone")); + } + } + + @SuppressWarnings("unchecked") + public void testTabletDevices() throws Exception { + + InputStream testTabletDevices = IngestUserAgentPlugin.class.getResourceAsStream("/test-tablet-devices.yml"); + + ArrayList> testDevices = readTestDevices(testTabletDevices, "tablet_devices"); + + for (HashMap testDevice : testDevices) { + VersionedName os = getVersionName(testDevice.get("os")); + + VersionedName userAgent = getVersionName(testDevice.get("browser")); + + VersionedName device = getVersionName(testDevice.get("device")); + + String deviceType = deviceTypeParser.findDeviceType(userAgent, os, device); + + assertThat(deviceType, is("Tablet")); + } + } + +} diff --git a/modules/ingest-user-agent/src/test/java/org/elasticsearch/ingest/useragent/UserAgentProcessorTests.java b/modules/ingest-user-agent/src/test/java/org/elasticsearch/ingest/useragent/UserAgentProcessorTests.java index a236eec45acfc..e2f28bfeff751 100644 --- a/modules/ingest-user-agent/src/test/java/org/elasticsearch/ingest/useragent/UserAgentProcessorTests.java +++ b/modules/ingest-user-agent/src/test/java/org/elasticsearch/ingest/useragent/UserAgentProcessorTests.java @@ -32,9 +32,12 @@ public class UserAgentProcessorTests extends ESTestCase { @BeforeClass public static void setupProcessor() throws IOException { InputStream regexStream = UserAgentProcessor.class.getResourceAsStream("/regexes.yml"); + InputStream deviceTypeRegexStream = UserAgentProcessor.class.getResourceAsStream("/device_type_regexes.yml"); + assertNotNull(regexStream); + assertNotNull(deviceTypeRegexStream); - UserAgentParser parser = new UserAgentParser(randomAlphaOfLength(10), regexStream, new UserAgentCache(1000)); + UserAgentParser parser = new UserAgentParser(randomAlphaOfLength(10), regexStream, deviceTypeRegexStream, new UserAgentCache(1000)); processor = new UserAgentProcessor(randomAlphaOfLength(10), null, "source_field", "target_field", parser, EnumSet.allOf(UserAgentProcessor.Property.class), false); @@ -101,6 +104,34 @@ public void testCommonBrowser() throws Exception { assertThat(target.get("os"), is(os)); Map device = new HashMap<>(); device.put("name", "Mac"); + device.put("type", "Desktop"); + assertThat(target.get("device"), is(device)); + } + + @SuppressWarnings("unchecked") + public void testWindowsOS() throws Exception { + Map document = new HashMap<>(); + document.put("source_field", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36"); + IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document); + + processor.execute(ingestDocument); + Map data = ingestDocument.getSourceAndMetadata(); + + assertThat(data, hasKey("target_field")); + Map target = (Map) data.get("target_field"); + + assertThat(target.get("name"), is("Chrome")); + assertThat(target.get("version"), is("87.0.4280.141")); + + Map os = new HashMap<>(); + os.put("name", "Windows"); + os.put("version", "10"); + os.put("full", "Windows 10"); + assertThat(target.get("os"), is(os)); + Map device = new HashMap<>(); + device.put("name", "Other"); + device.put("type", "Desktop"); assertThat(target.get("device"), is(device)); } @@ -129,6 +160,7 @@ public void testUncommonDevice() throws Exception { Map device = new HashMap<>(); device.put("name", "Motorola Xoom"); + device.put("type", "Phone"); assertThat(target.get("device"), is(device)); } @@ -152,6 +184,37 @@ public void testSpider() throws Exception { Map device = new HashMap<>(); device.put("name", "Spider"); + device.put("type", "Robot"); + assertThat(target.get("device"), is(device)); + } + + @SuppressWarnings("unchecked") + public void testTablet() throws Exception { + Map document = new HashMap<>(); + document.put("source_field", + "Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) " + + "Version/12.1 Mobile/15E148 Safari/604.1"); + IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document); + + processor.execute(ingestDocument); + Map data = ingestDocument.getSourceAndMetadata(); + + assertThat(data, hasKey("target_field")); + Map target = (Map) data.get("target_field"); + + assertThat(target.get("name"), is("Mobile Safari")); + + assertThat(target.get("version"), is("12.1")); + + Map os = new HashMap<>(); + os.put("name", "iOS"); + os.put("version", "12.2"); + os.put("full", "iOS 12.2"); + assertThat(target.get("os"), is(os)); + + Map device = new HashMap<>(); + device.put("name", "iPad"); + device.put("type", "Tablet"); assertThat(target.get("device"), is(device)); } @@ -177,6 +240,7 @@ public void testUnknown() throws Exception { assertNull(target.get("os")); Map device = new HashMap<>(); device.put("name", "Other"); + device.put("type", "Other"); assertThat(target.get("device"), is(device)); } } diff --git a/modules/ingest-user-agent/src/test/resources/test-desktop-devices.yml b/modules/ingest-user-agent/src/test/resources/test-desktop-devices.yml new file mode 100644 index 0000000000000..a23d44b984fff --- /dev/null +++ b/modules/ingest-user-agent/src/test/resources/test-desktop-devices.yml @@ -0,0 +1,177 @@ +# Apache License, Version 2.0 +# =========================== +# +# Copyright 2009 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +desktop_devices: + - type: Desktop + os: Windows + browser: Chrome + - type: Desktop + os: Mac OS X + browser: Chrome + - type: Desktop + os: Linux + browser: Cypress + - type: Desktop + os: Ubuntu + browser: Firefox + - type: Desktop + os: Chrome OS + browser: Chrome + - type: Desktop + os: Fedora + browser: Firefox + - type: Desktop + os: FreeBSD + browser: Chrome + - type: Desktop + os: OpenBSD + browser: Firefox + - type: Desktop + os: Arch Linux + browser: Firefox + - type: Desktop + os: Solaris + browser: Firefox + - type: Desktop + os: NetBSD + browser: Firefox + - type: Desktop + os: SUSE + browser: Epiphany + - type: Desktop + browser: Chrome + os: Mac OS X + - type: Desktop + browser: Firefox + os: Windows NT + - type: Desktop + browser: Edge + os: Windows NT + - type: Desktop + browser: HeadlessChrome + os: Linux + - type: Desktop + browser: Safari + os: Mac OS X + - type: Desktop + browser: Electron + os: Linux + - type: Desktop + browser: Opera + os: Linux + - type: Desktop + browser: Samsung Internet + os: Linux + - type: Desktop + browser: Chromium + os: Ubuntu + - type: Desktop + browser: Yandex Browser + os: Windows NT + - type: Desktop + browser: Whale + os: Windows NT + - type: Desktop + browser: Sogou Explorer + os: Windows NT + - type: Desktop + browser: QQ Browser + os: Windows NT + - type: Desktop + browser: IE + os: Windows NT + - type: Desktop + browser: Yeti + os: Windows NT + - type: Desktop + browser: Apple Mail + os: Mac OS X + - type: Desktop + browser: Coc Coc + os: Windows NT + - type: Desktop + browser: Maxthon + os: Windows NT + - type: Desktop + browser: Waterfox + os: Linux + - type: Desktop + browser: Iron + os: Mac OS X + - type: Desktop + browser: UC Browser + os: Windows NT + - type: Desktop + browser: Pale Moon + os: Linux + - type: Desktop + browser: WordPress + os: Linux + - type: Desktop + browser: Vivaldi + os: Windows NT + - type: Desktop + browser: Dragon + os: Windows NT + - type: Desktop + browser: SeaMonkey + os: Windows NT + - type: Desktop + browser: Sleipnir + os: Windows NT + - type: Desktop + browser: Thunderbird + os: Linux + - type: Desktop + browser: Epiphany + os: Linux + - type: Desktop + browser: Datanyze + os: Linux + - type: Desktop + browser: Basilisk + os: Windows NT + - type: Desktop + browser: Swiftfox + os: Linux + - type: Desktop + browser: Netscape + os: SunOS + - type: Desktop + browser: Puffin + os: Linux + - type: Desktop + browser: Seznam prohlížeč + os: Windows NT + - type: Desktop + browser: iCab + os: Mac OS X + - type: Desktop + browser: Opera Neon + os: Windows NT + - type: Desktop + browser: Mail.ru Chromium Browser + os: Windows NT + - type: Desktop + browser: Otter + os: BeOS/Haiku + - type: Desktop + browser: Iceweasel + os: Linux + - type: Desktop + browser: Chrome Mobile WebView + os: Linux diff --git a/modules/ingest-user-agent/src/test/resources/test-mobile-devices.yml b/modules/ingest-user-agent/src/test/resources/test-mobile-devices.yml new file mode 100644 index 0000000000000..340251ba06b16 --- /dev/null +++ b/modules/ingest-user-agent/src/test/resources/test-mobile-devices.yml @@ -0,0 +1,124 @@ +# Apache License, Version 2.0 +# =========================== +# +# Copyright 2009 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +mobile_devices: + - type: Phone + os: Android + browser: Chrome + - type: Phone + os: iOS + browser: Firefox +# - type: Phone +# os: Windows Phone +# browser: Edge + - type: Phone + os: KaiOS + browser: Firefox + - type: Phone + os: Sailfish + browser: SailfishBrowser + - type: Phone + os: Maemo + browser: Fennec + - type: Phone + os: BlackBerry OS + browser: Mobile Safari + - type: Phone + browser: Chrome Mobile + os: Android + - type: Phone + browser: Mobile Safari + os: iOS + - type: Phone + browser: Chrome Mobile WebView + os: Android + - type: Phone + browser: Firefox Mobile + os: Android + - type: Phone + browser: Chrome Mobile iOS + os: iOS + - type: Phone + browser: Facebook + os: Android + - type: Phone + browser: Mobile Safari UI/WKWebView + os: iOS + - type: Phone + browser: Firefox iOS + os: iOS + - type: Phone + browser: Opera Mobile + os: Android + - type: Phone + browser: MiuiBrowser + os: Android + - type: Phone + browser: Edge Mobile + os: Android + - type: Phone + browser: Android + os: Android + - type: Phone + browser: LINE + os: iOS + - type: Phone + browser: QQ Browser Mobile + os: Android + - type: Phone + browser: Flipboard + os: Android + - type: Phone + browser: Instagram + os: iOS + - type: Phone + browser: Pinterest + os: iOS + - type: Phone + browser: OktaMobile + os: iOS + - type: Phone + browser: Twitter + os: Android + - type: Phone + browser: Mint Browser + os: Android + - type: Phone + browser: Snapchat + os: iOS + - type: Phone + browser: IE Mobile + os: Windows Phone + - type: Phone + browser: Sailfish Browser + os: Linux + - type: Phone + browser: MobileIron + os: iOS + - type: Phone + browser: charlotte + os: Android + - type: Phone + browser: BlackBerry WebKit + os: BlackBerry + - type: Phone + browser: YandexSearch + os: Android + - type: Phone + browser: Salesforce + os: iOS + diff --git a/modules/ingest-user-agent/src/test/resources/test-other-devices.yml b/modules/ingest-user-agent/src/test/resources/test-other-devices.yml new file mode 100644 index 0000000000000..98595d961552b --- /dev/null +++ b/modules/ingest-user-agent/src/test/resources/test-other-devices.yml @@ -0,0 +1,22 @@ +# Apache License, Version 2.0 +# =========================== +# +# Copyright 2009 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +robot_devices: + - type: Desktop + os: Tizen + browser: AppleWebKit + diff --git a/modules/ingest-user-agent/src/test/resources/test-robot-devices.yml b/modules/ingest-user-agent/src/test/resources/test-robot-devices.yml new file mode 100644 index 0000000000000..e3f2d0cedc10f --- /dev/null +++ b/modules/ingest-user-agent/src/test/resources/test-robot-devices.yml @@ -0,0 +1,123 @@ +# Apache License, Version 2.0 +# =========================== +# +# Copyright 2009 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +robot_devices: + - type: Robot + os: CentOS + browser: AdsBot-Naver + - type: Robot + browser: iSec_Bot + os: Cloud + - type: Robot + browser: moatbot + os: Cloud + - type: Robot + browser: Baiduspider-render + os: Cloud + - type: Robot + browser: AhrefsBot + os: Cloud + - type: Robot + browser: Applebot + os: Cloud + - type: Robot + browser: Seekport Crawler + os: Cloud + - type: Robot + browser: Linespider + os: Cloud + - type: Robot + browser: pingbot + os: Cloud + - type: Robot + browser: YisouSpider + os: Cloud + - type: Robot + browser: HubSpot Crawler + os: Cloud + - type: Robot + browser: AdsBot + os: Cloud + - type: Robot + browser: net/bot + os: Cloud + - type: Robot + browser: Investment Crawler + os: Cloud + - type: Robot + browser: Bytespider + os: Cloud + - type: Robot + browser: IBM-Crawler + os: Cloud + - type: Robot + browser: BublupBot + os: Cloud + - type: Robot + browser: AppEngine-Google + os: Google Cloud + - type: Robot + browser: YandexBot + os: Cloud + - type: Robot + browser: Slackbot-LinkExpanding + os: Cloud + - type: Robot + browser: WebPageTest.org bot + os: Cloud + - type: Robot + browser: Baiduspider-image + os: Cloud + - type: Robot + browser: Pinterestbot + os: Cloud + - type: Robot + browser: YandexAccessibilityBot + os: Cloud + - type: Robot + browser: FacebookBot + os: Cloud + - type: Robot + browser: BLEXBot + os: Cloud + - type: Robot + browser: SuperBot + os: Cloud + - type: Robot + browser: Googlebot-News + os: Cloud + - type: Robot + browser: SMTBot + os: Cloud + - type: Robot + browser: GooglePlusBot + os: Cloud + - type: Robot + browser: niocBot + os: Cloud + - type: Robot + browser: SpiderWeb + os: Cloud + - type: Robot + browser: facebot + os: Cloud + - type: Robot + browser: MJ12bot + os: Cloud + - type: Robot + browser: ethical-bugbot + os: Linux diff --git a/modules/ingest-user-agent/src/test/resources/test-tablet-devices.yml b/modules/ingest-user-agent/src/test/resources/test-tablet-devices.yml new file mode 100644 index 0000000000000..0f67c361d7c0b --- /dev/null +++ b/modules/ingest-user-agent/src/test/resources/test-tablet-devices.yml @@ -0,0 +1,39 @@ +# Apache License, Version 2.0 +# =========================== +# +# Copyright 2009 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +tablet_devices: + - type: Tablet + os: BlackBerry Tablet OS + browser: Edg + - type: Tablet + browser: Amazon Silk + os: FireOS + - type: Tablet + browser: Crosswalk + os: Android + - type: Tablet + browser: Chrome Mobile WebView + os: Android + device: Samsung SM-T590 + - type: Tablet + browser: Amazon Silk + os: Linux + device: Kindle + - type: Tablet + browser: Chrome + os: Android + device: Samsung SM-T307U diff --git a/modules/ingest-user-agent/src/yamlRestTest/resources/rest-api-spec/test/ingest-useragent/20_useragent_processor.yml b/modules/ingest-user-agent/src/yamlRestTest/resources/rest-api-spec/test/ingest-useragent/20_useragent_processor.yml index 4daef7da0ce0a..a5e8e0b2490f9 100644 --- a/modules/ingest-user-agent/src/yamlRestTest/resources/rest-api-spec/test/ingest-useragent/20_useragent_processor.yml +++ b/modules/ingest-user-agent/src/yamlRestTest/resources/rest-api-spec/test/ingest-useragent/20_useragent_processor.yml @@ -32,7 +32,7 @@ - match: { _source.user_agent.original: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.149 Safari/537.36" } - match: { _source.user_agent.os: {"name":"Mac OS X", "version":"10.9.2", "full":"Mac OS X 10.9.2"} } - match: { _source.user_agent.version: "33.0.1750.149" } - - match: { _source.user_agent.device: {"name": "Mac" }} + - match: { _source.user_agent.device: {"name": "Mac", type: "Desktop" }} --- "Test user agent processor with parameters": diff --git a/modules/ingest-user-agent/src/yamlRestTest/resources/rest-api-spec/test/ingest-useragent/30_custom_regex.yml b/modules/ingest-user-agent/src/yamlRestTest/resources/rest-api-spec/test/ingest-useragent/30_custom_regex.yml index 763bea0ee4da0..cc7a461a338a1 100644 --- a/modules/ingest-user-agent/src/yamlRestTest/resources/rest-api-spec/test/ingest-useragent/30_custom_regex.yml +++ b/modules/ingest-user-agent/src/yamlRestTest/resources/rest-api-spec/test/ingest-useragent/30_custom_regex.yml @@ -30,6 +30,6 @@ id: 1 - match: { _source.field1: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.149 Safari/537.36" } - match: { _source.user_agent.name: "Test" } - - match: { _source.user_agent.device: {"name": "Other" }} + - match: { _source.user_agent.device: {"name": "Other", "type": "Other" }} - is_false: _source.user_agent.os - is_false: _source.user_agent.version