Improvements in Drill query runner and minor refactoring

1. Drill query runner now inherits from `BaseHTTPQueryRunner`, because they both have a lot of common code. 2. `BaseHTTPQueryRunner.get_response` method now accepts `http_method` argument (original implementation was only capable of sending `GET` HTTP requests). 3. Added `order` to `BaseHTTPRequestRunner` configuration schema to fix order of UI elements based on the schema. 4. Eliminated duplicate method `_guess_type` in `GoogleSpreadsheet`, `Results` and `Drill` query runners, moved `guess_type` to `redash.query_runner`. 5. Removed tests for `_guess_type` in `GoogleSpreadsheet`, `Results` and `Drill` query runners, merged them into single test case and moved to `tests.query_runner.test_utils`. 6. Various minor changes (code style, imports, etc).
getredash · Dec 14, 2018 · 040fc79 · 040fc79
1 parent a62d54f
commit 040fc79
Show file tree

Hide file tree

Showing 10 changed files with 108 additions and 264 deletions.
diff --git a/redash/query_runner/__init__.py b/redash/query_runner/__init__.py
@@ -1,4 +1,6 @@
 import logging
+
+from dateutil import parser
 import requests
 
 from redash import settings
@@ -20,7 +22,8 @@
     'SUPPORTED_COLUMN_TYPES',
     'register',
     'get_query_runner',
-    'import_query_runners'
+    'import_query_runners',
+    'guess_type'
 ]
 
 # Valid types of columns returned in results:
@@ -169,7 +172,8 @@ def configuration_schema(cls):
                     'title': cls.password_title,
                 },
             },
-            'secret': ['password']
+            'secret': ['password'],
+            'order': ['url', 'username', 'password']
         }
 
         if cls.requires_url or cls.requires_authentication:
@@ -192,7 +196,7 @@ def get_auth(self):
         else:
             return None
 
-    def get_response(self, url, auth=None, **kwargs):
+    def get_response(self, url, auth=None, http_method='get', **kwargs):
         # Get authentication values if not given
         if auth is None:
             auth = self.get_auth()
@@ -202,7 +206,7 @@ def get_response(self, url, auth=None, **kwargs):
         error = None
         response = None
         try:
-            response = requests.get(url, auth=auth, **kwargs)
+            response = requests.request(http_method, url, auth=auth, **kwargs)
             # Raise a requests HTTP exception with the appropriate reason
             # for 4xx and 5xx response status codes which is later caught
             # and passed back.
@@ -265,3 +269,31 @@ def get_configuration_schema_for_query_runner_type(query_runner_type):
 def import_query_runners(query_runner_imports):
     for runner_import in query_runner_imports:
         __import__(runner_import)
+
+
+def guess_type(string_value):
+    if string_value == '' or string_value is None:
+        return TYPE_STRING
+
+    try:
+        int(string_value)
+        return TYPE_INTEGER
+    except (ValueError, OverflowError):
+        pass
+
+    try:
+        float(string_value)
+        return TYPE_FLOAT
+    except (ValueError, OverflowError):
+        pass
+
+    if unicode(string_value).lower() in ('true', 'false'):
+        return TYPE_BOOLEAN
+
+    try:
+        parser.parse(string_value)
+        return TYPE_DATETIME
+    except (ValueError, OverflowError):
+        pass
+
+    return TYPE_STRING
diff --git a/redash/query_runner/drill.py b/redash/query_runner/drill.py
@@ -1,46 +1,19 @@
 import os
 import logging
-import requests
 import re
 
 from dateutil import parser
 
-from redash.query_runner import BaseQueryRunner, register
-from redash.query_runner import TYPE_STRING, TYPE_DATETIME, TYPE_INTEGER, TYPE_FLOAT, TYPE_BOOLEAN
+from redash.query_runner import (
+    BaseHTTPQueryRunner, register,
+    TYPE_DATETIME, TYPE_INTEGER, TYPE_FLOAT, TYPE_BOOLEAN,
+    guess_type
+)
 from redash.utils import json_dumps, json_loads
 
 logger = logging.getLogger(__name__)
 
 
-# Drill returns request result as strings, so we have to guess the actual column type
-def guess_type(string_value):
-    if string_value == '' or string_value is None:
-        return TYPE_STRING
-
-    try:
-        int(string_value)
-        return TYPE_INTEGER
-    except (ValueError, OverflowError):
-        pass
-
-    try:
-        float(string_value)
-        return TYPE_FLOAT
-    except (ValueError, OverflowError):
-        pass
-
-    if unicode(string_value).lower() in ('true', 'false'):
-        return TYPE_BOOLEAN
-
-    try:
-        parser.parse(string_value)
-        return TYPE_DATETIME
-    except (ValueError, OverflowError):
-        pass
-
-    return TYPE_STRING
-
-
 # Convert Drill string value to actual type
 def convert_type(string_value, actual_type):
     if string_value is None or string_value == '':
@@ -86,107 +59,39 @@ def parse_response(data):
     return {'columns': columns, 'rows': rows}
 
 
-class Drill(BaseQueryRunner):
+class Drill(BaseHTTPQueryRunner):
     noop_query = 'select version from sys.version'
+    response_error = "Drill API returned unexpected status code"
+    requires_authentication = False
+    requires_url = True
+    url_title = 'Drill URL'
+    username_title = 'Username'
+    password_title = 'Password'
 
     @classmethod
     def name(cls):
         return 'Apache Drill'
 
-    @classmethod
-    def type(cls):
-        return 'drill'
-
-    @classmethod
-    def enabled(cls):
-        return True
-
     @classmethod
     def configuration_schema(cls):
-        schema = {
-            'type': 'object',
-            'properties': {
-                'username': {
-                    'type': 'string',
-                    'title': 'Username',
-                },
-                'password': {
-                    'type': 'string',
-                    'title': 'Password',
-                },
-                'url': {
-                    'type': 'string',
-                    'title': 'Drill URL',
-                },
-                # Since Drill itself can act as aggregator of various datasources,
-                # it can contain quite a lot of schemas in `INFORMATION_SCHEMA`
-                # We added this to improve user experience and let users focus only on desired schemas.
-                'allowed_schemas': {
-                    'type': 'string',
-                    'title': 'List of schemas to use in schema browser (comma separated)'
-                }
-            },
-            'order': ['url', 'username', 'password', 'allowed_schemas'],
-            'required': ['url'],
-            'secret': ['password']
+        schema = super(Drill, cls).configuration_schema()
+        # Since Drill itself can act as aggregator of various datasources,
+        # it can contain quite a lot of schemas in `INFORMATION_SCHEMA`
+        # We added this to improve user experience and let users focus only on desired schemas.
+        schema['properties']['allowed_schemas'] = {
+            'type': 'string',
+            'title': 'List of schemas to use in schema browser (comma separated)'
         }
+        schema['order'] += ['allowed_schemas']
         return schema
 
-    def get_auth(self):
-        username = self.configuration.get('username')
-        password = self.configuration.get('password')
-        if username and password:
-            return (username, password)
-        else:
-            return None
-
-    def get_response(self, url, auth=None, **kwargs):
-        # Get authentication values if not given
-        if auth is None:
-            auth = self.get_auth()
-
-        # Then call requests to get the response from the given endpoint
-        # URL optionally, with the additional requests parameters.
-        error = None
-        response = None
-        try:
-            response = requests.post(url, auth=auth, **kwargs)
-            # Raise a requests HTTP exception with the appropriate reason
-            # for 4xx and 5xx response status codes which is later caught
-            # and passed back.
-            response.raise_for_status()
-
-            # Any other responses (e.g. 2xx and 3xx):
-            if response.status_code != 200:
-                error = '{} ({}).'.format(
-                    'Drill returned unexpected status code',
-                    response.status_code,
-                )
-
-        except requests.HTTPError as exc:
-            logger.exception(exc)
-            error = (
-                'Failed to execute query. '
-                'Return Code: {} Reason: {}'.format(
-                    response.status_code,
-                    response.text
-                )
-            )
-        except requests.RequestException as exc:
-            # Catch all other requests exceptions and return the error.
-            logger.exception(exc)
-            error = str(exc)
-
-        # Return response and error.
-        return response, error
-
     def run_query(self, query, user):
         drill_url = os.path.join(self.configuration['url'], 'query.json')
 
         try:
             payload = {'queryType': 'SQL', 'query': query}
 
-            response, error = self.get_response(drill_url, json=payload)
+            response, error = self.get_response(drill_url, http_method='post', json=payload)
             if error is not None:
                 return None, error
 
@@ -236,4 +141,3 @@ def get_schema(self, get_stats=False):
 
 
 register(Drill)
-
diff --git a/redash/query_runner/google_spreadsheets.py b/redash/query_runner/google_spreadsheets.py
@@ -48,29 +48,6 @@ def _get_columns_and_column_names(row):
     return columns, column_names
 
 
-def _guess_type(value):
-    if value == '':
-        return TYPE_STRING
-    try:
-        val = int(value)
-        return TYPE_INTEGER
-    except ValueError:
-        pass
-    try:
-        val = float(value)
-        return TYPE_FLOAT
-    except ValueError:
-        pass
-    if unicode(value).lower() in ('true', 'false'):
-        return TYPE_BOOLEAN
-    try:
-        val = parser.parse(value)
-        return TYPE_DATETIME
-    except (ValueError, OverflowError):
-        pass
-    return TYPE_STRING
-
-
 def _value_eval_list(row_values, col_types):
     value_list = []
     raw_values = zip(col_types, row_values)
@@ -120,7 +97,7 @@ def parse_worksheet(worksheet):
 
     if len(worksheet) > 1:
         for j, value in enumerate(worksheet[HEADER_INDEX + 1]):
-            columns[j]['type'] = _guess_type(value)
+            columns[j]['type'] = guess_type(value)
 
     column_types = [c['type'] for c in columns]
     rows = [dict(zip(column_names, _value_eval_list(row, column_types))) for row in worksheet[HEADER_INDEX + 1:]]

diff --git a/redash/query_runner/query_results.py b/redash/query_runner/query_results.py
@@ -8,9 +8,7 @@
 
 from redash import models
 from redash.permissions import has_access, not_view_only
-from redash.query_runner import (TYPE_BOOLEAN, TYPE_DATETIME, TYPE_FLOAT,
-                                 TYPE_INTEGER, TYPE_STRING, BaseQueryRunner,
-                                 register)
+from redash.query_runner import guess_type, TYPE_STRING, BaseQueryRunner, register
 from redash.utils import json_dumps, json_loads
 
 logger = logging.getLogger(__name__)
@@ -24,28 +22,6 @@ class CreateTableError(Exception):
     pass
 
 
-def _guess_type(value):
-    if value == '' or value is None:
-        return TYPE_STRING
-
-    if isinstance(value, numbers.Integral):
-        return TYPE_INTEGER
-
-    if isinstance(value, float):
-        return TYPE_FLOAT
-
-    if text_type(value).lower() in ('true', 'false'):
-        return TYPE_BOOLEAN
-
-    try:
-        parser.parse(value)
-        return TYPE_DATETIME
-    except (ValueError, OverflowError):
-        pass
-
-    return TYPE_STRING
-
-
 def extract_query_ids(query):
     queries = re.findall(r'(?:join|from)\s+query_(\d+)', query, re.IGNORECASE)
     return [int(q) for q in queries]
@@ -164,7 +140,7 @@ def run_query(self, query, user):
 
                 for i, row in enumerate(cursor):
                     for j, col in enumerate(row):
-                        guess = _guess_type(col)
+                        guess = guess_type(col)
 
                         if columns[j]['type'] is None:
                             columns[j]['type'] = guess

diff --git a/redash/settings/__init__.py b/redash/settings/__init__.py
@@ -189,7 +189,7 @@ def all_settings():
     'redash.query_runner.db2',
     'redash.query_runner.druid',
     'redash.query_runner.kylin',
-    'redash.query_runner.drill'
+    'redash.query_runner.drill',
 ]
 
 enabled_query_runners = array_from_string(os.environ.get("REDASH_ENABLED_QUERY_RUNNERS", ",".join(default_query_runners)))

diff --git a/tests/query_runner/test_drill.py b/tests/query_runner/test_drill.py
@@ -2,37 +2,8 @@
 import datetime
 from unittest import TestCase
 
-from mock import MagicMock
-
 from redash.query_runner import TYPE_DATETIME, TYPE_FLOAT, TYPE_INTEGER, TYPE_BOOLEAN, TYPE_STRING
-from redash.query_runner.drill import guess_type, convert_type, parse_response, Drill
-
-
-class TestGuessType(TestCase):
-    def test_handles_unicode(self):
-        self.assertEqual(guess_type(u'Текст'), TYPE_STRING)
-
-    def test_detects_booleans(self):
-        self.assertEqual(guess_type('true'), TYPE_BOOLEAN)
-        self.assertEqual(guess_type('True'), TYPE_BOOLEAN)
-        self.assertEqual(guess_type('TRUE'), TYPE_BOOLEAN)
-        self.assertEqual(guess_type('false'), TYPE_BOOLEAN)
-        self.assertEqual(guess_type('False'), TYPE_BOOLEAN)
-        self.assertEqual(guess_type('FALSE'), TYPE_BOOLEAN)
-
-    def test_detects_strings(self):
-        self.assertEqual(guess_type(None), TYPE_STRING)
-        self.assertEqual(guess_type(''), TYPE_STRING)
-        self.assertEqual(guess_type('redash'), TYPE_STRING)
-
-    def test_detects_integer(self):
-        self.assertEqual(guess_type('42'), TYPE_INTEGER)
-
-    def test_detects_float(self):
-        self.assertEqual(guess_type('3.14'), TYPE_FLOAT)
-
-    def test_detects_date(self):
-        self.assertEqual(guess_type('2018-10-31'), TYPE_DATETIME)
+from redash.query_runner.drill import convert_type, parse_response
 
 
 class TestConvertType(TestCase):
@@ -118,4 +89,3 @@ def test_parse_regular_response(self):
         self.assertEqual(row_1['date'], datetime.datetime(2018, 2, 1, 0, 0))
         self.assertEqual(row_1['count'], 20)
         self.assertAlmostEqual(row_1['avg'], 6.28, 2)
-