From b6bd46e59ef6ece4e1677765277158e2f916074c Mon Sep 17 00:00:00 2001
From: Arik Fraimovich <arik@arikfr.com>
Date: Tue, 28 Oct 2014 07:38:11 +0200
Subject: [PATCH 01/13] New query runners implementation

---
 redash/__init__.py                            |   5 +-
 redash/data/__init__.py                       |   0
 redash/data/query_runner.py                   |  34 ---
 redash/data/query_runner_graphite.py          |  46 ----
 redash/data/query_runner_mongodb.py           | 242 ------------------
 redash/data/query_runner_script.py            |  51 ----
 redash/query_runner/__init__.py               |  83 ++++++
 .../big_query.py}                             | 119 +++++----
 redash/query_runner/graphite.py               |  68 +++++
 redash/query_runner/mongodb.py                | 175 +++++++++++++
 .../mysql.py}                                 |  80 +++---
 .../query_runner_pg.py => query_runner/pg.py} | 110 ++++----
 redash/query_runner/script.py                 |  56 ++++
 .../url.py}                                   |  23 +-
 redash/settings.py                            |  11 +
 redash/tasks.py                               |  12 +-
 16 files changed, 597 insertions(+), 518 deletions(-)
 delete mode 100644 redash/data/__init__.py
 delete mode 100644 redash/data/query_runner.py
 delete mode 100644 redash/data/query_runner_graphite.py
 delete mode 100644 redash/data/query_runner_mongodb.py
 delete mode 100644 redash/data/query_runner_script.py
 create mode 100644 redash/query_runner/__init__.py
 rename redash/{data/query_runner_bigquery.py => query_runner/big_query.py} (51%)
 create mode 100644 redash/query_runner/graphite.py
 create mode 100644 redash/query_runner/mongodb.py
 rename redash/{data/query_runner_mysql.py => query_runner/mysql.py} (55%)
 rename redash/{data/query_runner_pg.py => query_runner/pg.py} (50%)
 create mode 100644 redash/query_runner/script.py
 rename redash/{data/query_runner_url.py => query_runner/url.py} (74%)

diff --git a/redash/__init__.py b/redash/__init__.py
index f766fe87d8..8dc7da6599 100644
--- a/redash/__init__.py
+++ b/redash/__init__.py
@@ -4,6 +4,7 @@
 from statsd import StatsClient
 
 from redash import settings
+from redash.query_runner import import_query_runners
 
 __version__ = '0.5.0'
 
@@ -31,4 +32,6 @@ def create_redis_connection():
 
 setup_logging()
 redis_connection = create_redis_connection()
-statsd_client = StatsClient(host=settings.STATSD_HOST, port=settings.STATSD_PORT, prefix=settings.STATSD_PREFIX)
\ No newline at end of file
+statsd_client = StatsClient(host=settings.STATSD_HOST, port=settings.STATSD_PORT, prefix=settings.STATSD_PREFIX)
+
+import_query_runners(settings.QUERY_RUNNERS)
diff --git a/redash/data/__init__.py b/redash/data/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/redash/data/query_runner.py b/redash/data/query_runner.py
deleted file mode 100644
index d58d0bed39..0000000000
--- a/redash/data/query_runner.py
+++ /dev/null
@@ -1,34 +0,0 @@
-import json
-
-
-def get_query_runner(connection_type, connection_string):
-    if connection_type == 'mysql':
-        from redash.data import query_runner_mysql
-        runner = query_runner_mysql.mysql(connection_string)
-    elif connection_type == 'graphite':
-        from redash.data import query_runner_graphite
-        connection_params = json.loads(connection_string)
-        if connection_params['auth']:
-            connection_params['auth'] = tuple(connection_params['auth'])
-        else:
-            connection_params['auth'] = None
-        runner = query_runner_graphite.graphite(connection_params)
-    elif connection_type == 'bigquery':
-        from redash.data import query_runner_bigquery
-        connection_params = json.loads(connection_string)
-        runner = query_runner_bigquery.bigquery(connection_params)
-    elif connection_type == 'script':
-        from redash.data import query_runner_script
-        runner = query_runner_script.script(connection_string)
-    elif connection_type == 'url':
-        from redash.data import query_runner_url
-        runner = query_runner_url.url(connection_string)
-    elif connection_type == "mongo":
-        from redash.data import query_runner_mongodb
-        connection_params = json.loads(connection_string)
-        runner = query_runner_mongodb.mongodb(connection_params)
-    else:
-        from redash.data import query_runner_pg
-        runner = query_runner_pg.pg(connection_string)
-
-    return runner
diff --git a/redash/data/query_runner_graphite.py b/redash/data/query_runner_graphite.py
deleted file mode 100644
index dc664e0109..0000000000
--- a/redash/data/query_runner_graphite.py
+++ /dev/null
@@ -1,46 +0,0 @@
-"""
-QueryRunner for Graphite.
-"""
-import json
-import datetime
-import requests
-from redash.utils import JSONEncoder
-
-
-def graphite(connection_params):
-    def transform_result(response):
-        columns = [{'name': 'Time::x'}, {'name': 'value::y'}, {'name': 'name::series'}]
-        rows = []
-
-        for series in response.json():
-            for values in series['datapoints']:
-                timestamp = datetime.datetime.fromtimestamp(int(values[1]))
-                rows.append({'Time::x': timestamp, 'name::series': series['target'], 'value::y': values[0]})
-
-        data = {'columns': columns, 'rows': rows}
-        return json.dumps(data, cls=JSONEncoder)
-
-    def query_runner(query):
-        base_url = "%s/render?format=json&" % connection_params['url']
-        url = "%s%s" % (base_url, "&".join(query.split("\n")))
-        error = None
-        data = None
-
-        try:
-            response = requests.get(url, auth=connection_params['auth'],
-                                    verify=connection_params['verify'])
-
-            if response.status_code == 200:
-                data = transform_result(response)
-            else:
-                error = "Failed getting results (%d)" % response.status_code
-
-        except Exception, ex:
-            data = None
-            error = ex.message
-
-        return data, error
-
-    query_runner.annotate_query = False
-
-    return query_runner
\ No newline at end of file
diff --git a/redash/data/query_runner_mongodb.py b/redash/data/query_runner_mongodb.py
deleted file mode 100644
index 235a277eea..0000000000
--- a/redash/data/query_runner_mongodb.py
+++ /dev/null
@@ -1,242 +0,0 @@
-import datetime
-import logging
-import json
-import sys
-import re
-import time
-from redash.utils import JSONEncoder
-
-try:
-    import pymongo
-    from bson.objectid import ObjectId
-    from bson.son import SON
-except ImportError:
-    print "Missing dependencies. Please install pymongo."
-    print "You can use pip:   pip install pymongo"
-    raise
-
-TYPES_MAP = {
-    ObjectId : "string",
-    str : "string",
-    unicode : "string",
-    int : "integer",
-    long : "integer",
-    float : "float",
-    bool : "boolean",
-    datetime.datetime: "datetime",
-}
-
-date_regex = re.compile("ISODate\(\"(.*)\"\)", re.IGNORECASE)
-
-# Simple query example:
-#
-# {
-#     "collection" : "my_collection",
-#     "query" : {
-#         "date" : {
-#             "$gt" : "ISODate(\"2015-01-15 11:41\")",
-#         },
-#         "type" : 1
-#     },
-#     "fields" : {
-#         "_id" : 1,
-#         "name" : 2
-#     },
-#     "sort" : [
-#        {
-#             "name" : "date",
-#             "direction" : -1
-#        }
-#     ]
-#
-# }
-#
-#
-# Aggregation
-# ===========
-# Uses a syntax similar to the one used in PyMongo, however to support the
-# correct order of sorting, it uses a regular list for the "$sort" operation
-# that converts into a SON (sorted dictionary) object before execution.
-#
-# Aggregation query example:
-#
-# {
-#     "collection" : "things",
-#     "aggregate" : [
-#         {
-#             "$unwind" : "$tags"
-#         },
-#         {
-#             "$group" : {
-#                 {
-#                     "_id" : "$tags",
-#                     "count" : { "$sum" : 1 }
-#                 }
-#             }
-#         },
-#         {
-#             "$sort" : [
-#                 {
-#                     "name" : "count",
-#                     "direction" : -1
-#                 },
-#                 {
-#                     "name" : "_id",
-#                     "direction" : -1
-#                 }
-#             ]
-#         }
-#     ]
-# }
-#
-#
-def mongodb(connection_string):
-    def _get_column_by_name(columns, column_name):
-        for c in columns:
-            if "name" in c and c["name"] == column_name:
-                return c
-
-        return None
-
-    def _convert_date(q, field_name):
-        m = date_regex.findall(q[field_name])
-        if len(m) > 0:
-            if q[field_name].find(":") == -1:
-                q[field_name] = datetime.datetime.fromtimestamp(time.mktime(time.strptime(m[0], "%Y-%m-%d")))
-            else:
-                q[field_name] = datetime.datetime.fromtimestamp(time.mktime(time.strptime(m[0], "%Y-%m-%d %H:%M")))
-
-    def query_runner(query):
-        if not "dbName" in connection_string or not connection_string["dbName"]:
-            return None, "dbName is missing from connection string JSON or is empty"
-
-        db_name = connection_string["dbName"]
-
-        if not "connectionString" in connection_string or not connection_string["connectionString"]:
-            return None, "connectionString is missing from connection string JSON or is empty"
-
-        is_replica_set = True if "replicaSetName" in connection_string and connection_string["replicaSetName"] else False
-
-        if is_replica_set:
-            if not connection_string["replicaSetName"]:
-                return None, "replicaSetName is set in the connection string JSON but is empty"
-
-            db_connection = pymongo.MongoReplicaSetClient(connection_string["connectionString"], replicaSet=connection_string["replicaSetName"])
-        else:
-            db_connection = pymongo.MongoClient(connection_string["connectionString"])
-
-        if db_name not in db_connection.database_names():
-            return None, "Unknown database name '%s'" % db_name
-
-        db = db_connection[db_name]
-
-        logging.debug("mongodb connection string: %s", connection_string)
-        logging.debug("mongodb got query: %s", query)
-
-        try:
-            query_data = json.loads(query)
-        except:
-            return None, "Invalid query format. The query is not a valid JSON."
-
-        if "query" in query_data and "aggregate" in query_data:
-            return None, "'query' and 'aggregate' sections cannot be used at the same time"
-
-        collection = None
-        if not "collection" in query_data:
-            return None, "'collection' must be set"
-        else:
-            collection = query_data["collection"]
-
-        q = None
-        if "query" in query_data:
-            q = query_data["query"]
-            for k in q:
-                if q[k] and type(q[k]) in [str, unicode]:
-                    logging.debug(q[k])
-                    _convert_date(q, k)
-                elif q[k] and type(q[k]) is dict:
-                    for k2 in q[k]:
-                        if type(q[k][k2]) in [str, unicode]:
-                            _convert_date(q[k], k2)
-
-        f = None
-
-        aggregate = None
-        if "aggregate" in query_data:
-            aggregate = query_data["aggregate"]
-            for step in aggregate:
-                if "$sort" in step:
-                    sort_list = []
-                    for sort_item in step["$sort"]:
-                        sort_list.append((sort_item["name"], sort_item["direction"]))
-
-                    step["$sort"] = SON(sort_list)
-
-        if aggregate:
-            pass
-        else:
-            s = None
-            if "sort" in query_data and query_data["sort"]:
-                s = []
-                for field in query_data["sort"]:
-                    s.append((field["name"], field["direction"]))
-
-        if "fields" in query_data:
-            f = query_data["fields"]
-
-        columns = []
-        rows = []
-
-        error = None
-        json_data = None
-
-        cursor = None
-        if q or (not q and not aggregate):
-            if s:
-                cursor = db[collection].find(q, f).sort(s)
-            else:
-                cursor = db[collection].find(q, f)
-
-            if "skip" in query_data:
-                cursor = cursor.skip(query_data["skip"])
-
-            if "limit" in query_data:
-                cursor = cursor.limit(query_data["limit"])
-
-        elif aggregate:
-            r = db[collection].aggregate(aggregate)
-            cursor = r["result"]
-
-        for r in cursor:
-            for k in r:
-                if _get_column_by_name(columns, k) is None:
-                    columns.append({
-                        "name": k,
-                        "friendly_name": k,
-                        "type": TYPES_MAP[type(r[k])] if type(r[k]) in TYPES_MAP else None
-                    })
-
-                # Convert ObjectId to string
-                if type(r[k]) == ObjectId:
-                    r[k] = str(r[k])
-
-            rows.append(r)
-
-        if f:
-            ordered_columns = []
-            for k in sorted(f, key=f.get):
-                ordered_columns.append(_get_column_by_name(columns, k))
-
-            columns = ordered_columns
-
-        data = {
-            "columns": columns,
-            "rows": rows
-        }
-        error = None
-        json_data = json.dumps(data, cls=JSONEncoder)
-
-        return json_data, error
-
-    query_runner.annotate_query = False
-    return query_runner
diff --git a/redash/data/query_runner_script.py b/redash/data/query_runner_script.py
deleted file mode 100644
index 8959003166..0000000000
--- a/redash/data/query_runner_script.py
+++ /dev/null
@@ -1,51 +0,0 @@
-import json
-import logging
-import sys
-import os
-import subprocess
-
-# We use subprocess.check_output because we are lazy.
-# If someone will really want to run this on Python < 2.7 they can easily update the code to run
-# Popen, check the retcodes and other things and read the standard output to a variable.
-if not "check_output" in subprocess.__dict__:
-    print "ERROR: This runner uses subprocess.check_output function which exists in Python 2.7"
-
-def script(connection_string):
-
-    def query_runner(query):
-        try:
-            json_data = None
-            error = None
-
-            if connection_string is None:
-                return None, "script execution path is not set. Please reconfigure the data source"
-
-            # Poor man's protection against running scripts from output the scripts directory
-            if connection_string.find("../") > -1:
-                return None, "Scripts can only be run from the configured scripts directory"
-
-            query = query.strip()
-
-            script = os.path.join(connection_string, query)
-            if not os.path.exists(script):
-                return None, "Script '%s' not found in script directory" % query
-
-            output = subprocess.check_output(script, shell=False)
-            if output != None:
-                output = output.strip()
-                if output != "":
-                    return output, None
-
-            error = "Error reading output"
-        except subprocess.CalledProcessError as e:
-            return None, str(e)
-        except KeyboardInterrupt:
-            error = "Query cancelled by user."
-            json_data = None
-        except Exception as e:
-            raise sys.exc_info()[1], None, sys.exc_info()[2]
-
-        return json_data, error
-
-    query_runner.annotate_query = False
-    return query_runner
diff --git a/redash/query_runner/__init__.py b/redash/query_runner/__init__.py
new file mode 100644
index 0000000000..cfd7f69df8
--- /dev/null
+++ b/redash/query_runner/__init__.py
@@ -0,0 +1,83 @@
+import logging
+import json
+
+logger = logging.getLogger(__name__)
+
+__all__ = [
+    'ConfigurationError',
+    'BaseQueryRunner',
+    'TYPE_DATETIME',
+    'TYPE_BOOLEAN',
+    'TYPE_INTEGER',
+    'TYPE_STRING',
+    'TYPE_DATE',
+    'TYPE_FLOAT',
+    'register',
+    'get_query_runner',
+    'import_query_runners'
+]
+
+# Valid types of columns returned in results:
+TYPE_INTEGER = 'integer'
+TYPE_FLOAT = 'float'
+TYPE_BOOLEAN = 'boolean'
+TYPE_STRING = 'string'
+TYPE_DATETIME = 'datetime'
+TYPE_DATE = 'date'
+
+
+class ConfigurationError(RuntimeError):
+    pass
+
+
+class BaseQueryRunner(object):
+    def __init__(self, configuration_json):
+        try:
+            self.configuration = json.loads(configuration_json)
+        except ValueError:
+            raise ConfigurationError("Invalid configuration syntax")
+
+    @classmethod
+    def name(cls):
+        raise NotImplementedError()
+
+    @classmethod
+    def enabled(cls):
+        return True
+
+    @classmethod
+    def annotate_query(cls):
+        return True
+
+    @classmethod
+    def configuration_fields(cls):
+        return []
+
+    def run_query(self, query):
+        raise NotImplementedError()
+
+
+query_runners = {}
+
+
+def register(query_runner_type, query_runner_class):
+    global query_runners
+    if query_runner_class.enabled():
+        logger.info("Registering %s query runner.", query_runner_type)
+        query_runners[query_runner_type] = query_runner_class
+    else:
+        logger.warning("%s query runner not enabled; not registering", query_runner_type)
+
+
+def get_query_runner(query_runner_type, configuration_json):
+    global query_runners
+    query_runner_class = query_runners.get(query_runner_type, None)
+    if query_runner_class is None:
+        return None
+
+    return query_runner_class(configuration_json)
+
+
+def import_query_runners(query_runner_imports):
+    for runner_import in query_runner_imports:
+        __import__(runner_import)
\ No newline at end of file
diff --git a/redash/data/query_runner_bigquery.py b/redash/query_runner/big_query.py
similarity index 51%
rename from redash/data/query_runner_bigquery.py
rename to redash/query_runner/big_query.py
index b894febb53..8abbd72d40 100644
--- a/redash/data/query_runner_bigquery.py
+++ b/redash/query_runner/big_query.py
@@ -1,29 +1,24 @@
 import datetime
-import httplib2
 import json
+import httplib2
 import logging
 import sys
 import time
 
-try:
-    import apiclient.errors
-    from apiclient.discovery import build
-    from apiclient.errors import HttpError
-    from oauth2client.client import SignedJwtAssertionCredentials
-except ImportError:
-    print "Missing dependencies. Please install google-api-python-client and oauth2client."
-    print "You can use pip:   pip install google-api-python-client oauth2client"
-
+from redash.query_runner import *
 from redash.utils import JSONEncoder
 
+logger = logging.getLogger(__name__)
+
 types_map = {
-    'INTEGER': 'integer',
-    'FLOAT': 'float',
-    'BOOLEAN': 'boolean',
-    'STRING': 'string',
-    'TIMESTAMP': 'datetime',
+    'INTEGER': TYPE_INTEGER,
+    'FLOAT': TYPE_FLOAT,
+    'BOOLEAN': TYPE_BOOLEAN,
+    'STRING': TYPE_STRING,
+    'TIMESTAMP': TYPE_DATETIME,
 }
 
+
 def transform_row(row, fields):
     column_index = 0
     row_data = {}
@@ -49,37 +44,70 @@ def transform_row(row, fields):
 
     return row_data
 
-def bigquery(connection_string):
-    def load_key(filename):
-        f = file(filename, "rb")
-        try:
-            return f.read()
-        finally:
-            f.close()
 
-    def get_bigquery_service():
-        scope = [
-            "https://www.googleapis.com/auth/bigquery",
-        ]
+def _import():
+    try:
+        import apiclient.errors
+        from apiclient.discovery import build
+        from apiclient.errors import HttpError
+        from oauth2client.client import SignedJwtAssertionCredentials
+
+        return True
+    except ImportError:
+        logger.warning("Missing dependencies. Please install google-api-python-client and oauth2client.")
+        logger.warning("You can use pip:   pip install google-api-python-client oauth2client")
+
+    return False
+
+
+def _load_key(filename):
+    f = file(filename, "rb")
+    try:
+        return f.read()
+    finally:
+        f.close()
 
-        credentials = SignedJwtAssertionCredentials(connection_string["serviceAccount"],
-                                                    load_key(connection_string["privateKey"]), scope=scope)
-        http = httplib2.Http()
-        http = credentials.authorize(http)
 
-        return build("bigquery", "v2", http=http)
+def _get_bigquery_service(service_account, private_key):
+    scope = [
+        "https://www.googleapis.com/auth/bigquery",
+    ]
 
-    def get_query_results(jobs, project_id, job_id, start_index):
-        query_reply = jobs.getQueryResults(projectId=project_id, jobId=job_id, startIndex=start_index).execute()
-        logging.debug('query_reply %s', query_reply)
-        if not query_reply['jobComplete']:
-            time.sleep(10)
-            return get_query_results(jobs, project_id, job_id, start_index)
+    credentials = SignedJwtAssertionCredentials(service_account, private_key, scope=scope)
+    http = httplib2.Http()
+    http = credentials.authorize(http)
 
-        return query_reply
+    return build("bigquery", "v2", http=http)
 
-    def query_runner(query):
-        bigquery_service = get_bigquery_service()
+
+def _get_query_results(jobs, project_id, job_id, start_index):
+    query_reply = jobs.getQueryResults(projectId=project_id, jobId=job_id, startIndex=start_index).execute()
+    logging.debug('query_reply %s', query_reply)
+    if not query_reply['jobComplete']:
+        time.sleep(10)
+        return _get_query_results(jobs, project_id, job_id, start_index)
+
+    return query_reply
+
+
+class BigQuery(BaseQueryRunner):
+    @classmethod
+    def enabled(cls):
+        return _import()
+
+    @classmethod
+    def configuration_fields(cls):
+        return "serviceAccount", "privateKey", "projectId"
+
+    def __init__(self, configuration_json):
+        super(BigQuery, self).__init__(configuration_json)
+        _import()
+
+        self.private_key = _load_key(self.configuration["privateKey"])
+
+    def run_query(self, query):
+        bigquery_service = _get_bigquery_service(self.configuration["serviceAccount"],
+                                                 self.private_key)
 
         jobs = bigquery_service.jobs()
         job_data = {
@@ -90,17 +118,17 @@ def query_runner(query):
             }
         }
 
-        logging.debug("bigquery got query: %s", query)
+        logger.debug("BigQuery got query: %s", query)
 
-        project_id = connection_string["projectId"]
+        project_id = self.configuration["projectId"]
 
         try:
             insert_response = jobs.insert(projectId=project_id, body=job_data).execute()
             current_row = 0
-            query_reply = get_query_results(jobs, project_id=project_id,
+            query_reply = _get_query_results(jobs, project_id=project_id,
                                             job_id=insert_response['jobReference']['jobId'], start_index=current_row)
 
-            logging.debug("bigquery replied: %s", query_reply)
+            logger.debug("bigquery replied: %s", query_reply)
 
             rows = []
 
@@ -134,5 +162,4 @@ def query_runner(query):
 
         return json_data, error
 
-
-    return query_runner
+register("bigquery", BigQuery)
\ No newline at end of file
diff --git a/redash/query_runner/graphite.py b/redash/query_runner/graphite.py
new file mode 100644
index 0000000000..9bc15fd369
--- /dev/null
+++ b/redash/query_runner/graphite.py
@@ -0,0 +1,68 @@
+import json
+import datetime
+import requests
+import logging
+from redash.query_runner import *
+from redash.utils import JSONEncoder
+
+logger = logging.getLogger(__name__)
+
+
+def _transform_result(response):
+    columns = ({'name': 'Time::x', 'type': TYPE_DATETIME},
+               {'name': 'value::y', 'type': TYPE_FLOAT},
+               {'name': 'name::series', 'type': TYPE_STRING})
+
+    rows = []
+
+    for series in response.json():
+        for values in series['datapoints']:
+            timestamp = datetime.datetime.fromtimestamp(int(values[1]))
+            rows.append({'Time::x': timestamp, 'name::series': series['target'], 'value::y': values[0]})
+
+    data = {'columns': columns, 'rows': rows}
+    return json.dumps(data, cls=JSONEncoder)
+
+
+class Graphite(BaseQueryRunner):
+    @classmethod
+    def configuration_fields(cls):
+        return "url", "username", "password", "verify"
+
+    @classmethod
+    def annotate_query(cls):
+        return False
+
+    def __init__(self, configuration_json):
+        super(Graphite, self).__init__(configuration_json)
+
+        if "url" not in self.configuration:
+            raise ConfigurationError("Missing url")
+
+        if "username" in self.configuration and self.configuration["username"]:
+            self.auth = (self.configuration["username"], self.configuration["password"])
+        else:
+            self.auth = None
+
+        self.verify = self.configuration["verify"] == "true"
+        self.base_url = "%s/render?format=json&" % self.configuration['url']
+
+    def run_query(self, query):
+        url = "%s%s" % (self.base_url, "&".join(query.split("\n")))
+        error = None
+        data = None
+
+        try:
+            response = requests.get(url, auth=self.auth, verify=self.verify)
+
+            if response.status_code == 200:
+                data = _transform_result(response)
+            else:
+                error = "Failed getting results (%d)" % response.status_code
+        except Exception, ex:
+            data = None
+            error = ex.message
+
+        return data, error
+
+register("graphite", Graphite)
\ No newline at end of file
diff --git a/redash/query_runner/mongodb.py b/redash/query_runner/mongodb.py
new file mode 100644
index 0000000000..d36e10b7b5
--- /dev/null
+++ b/redash/query_runner/mongodb.py
@@ -0,0 +1,175 @@
+import json
+import datetime
+import logging
+import re
+import time
+
+from redash.utils import JSONEncoder
+from redash.query_runner import *
+
+logger = logging.getLogger(__name__)
+
+
+def _import():
+    try:
+        import pymongo
+        from bson.objectid import ObjectId
+        return True
+
+    except ImportError:
+        logger.warning("Missing dependencies. Please install pymongo.")
+        logger.warning("You can use pip:   pip install pymongo")
+
+    return False
+
+
+TYPES_MAP = {
+    str: TYPE_STRING,
+    unicode: TYPE_STRING,
+    int: TYPE_INTEGER,
+    long: TYPE_INTEGER,
+    float: TYPE_FLOAT,
+    bool: TYPE_BOOLEAN,
+    datetime.datetime: TYPE_DATETIME,
+}
+
+date_regex = re.compile("ISODate\(\"(.*)\"\)", re.IGNORECASE)
+
+
+def _get_column_by_name(columns, column_name):
+    for c in columns:
+        if "name" in c and c["name"] == column_name:
+            return c
+
+    return None
+
+
+def _convert_date(q, field_name):
+    m = date_regex.findall(q[field_name])
+    if len(m) > 0:
+        if q[field_name].find(":") == -1:
+            q[field_name] = datetime.datetime.fromtimestamp(time.mktime(time.strptime(m[0], "%Y-%m-%d")))
+        else:
+            q[field_name] = datetime.datetime.fromtimestamp(time.mktime(time.strptime(m[0], "%Y-%m-%d %H:%M")))
+
+
+class MongoDB(BaseQueryRunner):
+    @classmethod
+    def configuration_fields(cls):
+        return "connectionString", "dbName", "replicaSetName"
+
+    @classmethod
+    def enabled(cls):
+        return _import()
+
+    @classmethod
+    def annotate_query(cls):
+        return False
+
+    def __init__(self, configuration_json):
+        _import()
+        super(MongoDB, self).__init__(configuration_json)
+
+        if "dbName" not in self.configuration or not connection_string["dbName"]:
+            raise ConfigurationError("dbName is missing from connection string")
+
+        self.db_name = self.configuration["dbName"]
+
+        if "connectionString" not in self.configuration or not self.configuration["connectionString"]:
+            raise ConfigurationError("connectionString is missing from connection string")
+
+        self.is_replica_set = True if "replicaSetName" in self.configuration and self.configuration["replicaSetName"] else False
+
+        if self.is_replica_set and not self.configuration["replicaSetName"]:
+            raise ConfigurationError("replicaSetName is set in the connection string JSON but is empty")
+
+
+    def run_query(self, query):
+        if self.is_replica_set:
+            db_connection = pymongo.MongoReplicaSetClient(self.configuration["connectionString"], replicaSet=self.configuration["replicaSetName"])
+        else:
+            db_connection = pymongo.MongoClient(self.configuration["connectionString"])
+
+        if self.db_name not in db_connection.database_names():
+            return None, "Unknown database name '%s'" % self.db_name
+
+        db = db_connection[self.db_name ]
+
+        logger.debug("mongodb connection string: %s", self.configuration['connectionString'])
+        logger.debug("mongodb got query: %s", query)
+
+        try:
+            query_data = json.loads(query)
+        except ValueError:
+            return None, "Invalid query format. The query is not a valid JSON."
+
+        if "collection" not in query_data:
+            return None, "'collection' must have a value to run a query"
+        else:
+            collection = query_data["collection"]
+
+        q = None
+        if "query" in query_data:
+            q = query_data["query"]
+            for k in q:
+                if q[k] and type(q[k]) in [str, unicode]:
+                    logging.debug(q[k])
+                    _convert_date(q, k)
+                elif q[k] and type(q[k]) is dict:
+                    for k2 in q[k]:
+                        if type(q[k][k2]) in [str, unicode]:
+                            _convert_date(q[k], k2)
+
+        f = None
+        if "fields" in query_data:
+            f = query_data["fields"]
+
+        s = None
+        if "sort" in query_data and query_data["sort"]:
+            s = []
+            for field_name in query_data["sort"]:
+                s.append((field_name, query_data["sort"][field_name]))
+
+        columns = []
+        rows = []
+
+        error = None
+        json_data = None
+
+        if s:
+            cursor = db[collection].find(q, f).sort(s)
+        else:
+            cursor = db[collection].find(q, f)
+
+        for r in cursor:
+            for k in r:
+                if _get_column_by_name(columns, k) is None:
+                    columns.append({
+                        "name": k,
+                        "friendly_name": k,
+                        "type": TYPES_MAP.get(type(r[k]), TYPE_STRING)
+                    })
+
+                # Convert ObjectId to string
+                if type(r[k]) == ObjectId:
+                    r[k] = str(r[k])
+
+            rows.append(r)
+
+        if f:
+            ordered_columns = []
+            for k in sorted(f, key=f.get):
+                ordered_columns.append(_get_column_by_name(columns, k))
+
+            columns = ordered_columns
+
+        data = {
+            "columns": columns,
+            "rows": rows
+        }
+        error = None
+        json_data = json.dumps(data, cls=JSONEncoder)
+
+        return json_data, error
+
+register("mongo", MongoDB)
\ No newline at end of file
diff --git a/redash/data/query_runner_mysql.py b/redash/query_runner/mysql.py
similarity index 55%
rename from redash/data/query_runner_mysql.py
rename to redash/query_runner/mysql.py
index a59a023b3b..78269e79c6 100644
--- a/redash/data/query_runner_mysql.py
+++ b/redash/query_runner/mysql.py
@@ -1,64 +1,82 @@
-"""
-QueryRunner is the function that the workers use, to execute queries. This is the Redshift
-(PostgreSQL in fact) version, but easily we can write another to support additional databases
-(MySQL and others).
-
-Because the worker just pass the query, this can be used with any data store that has some sort of
-query language (for example: HiveQL).
-"""
-import logging
-import json
-import MySQLdb
 import sys
+import json
+import logging
+
 from redash.utils import JSONEncoder
+from redash.query_runner import *
+
+logger = logging.getLogger(__name__)
+
+
+class Mysql(BaseQueryRunner):
+    @classmethod
+    def configuration_fields(cls):
+        return "host", "user", "passwd" "db"
+
+    @classmethod
+    def enabled(cls):
+        try:
+            import MySQLdb
+        except ImportError:
+            return False
+
+        return True
+
+    def __init__(self, configuration_json):
+        super(Mysql, self).__init__(configuration_json)
 
-def mysql(connection_string):
-    if connection_string.endswith(';'):
-        connection_string = connection_string[0:-1]
-    
-    def query_runner(query):
-        connections_params = [entry.split('=')[1] for entry in connection_string.split(';')]
-        connection = MySQLdb.connect(*connections_params, charset="utf8", use_unicode=True)
+        if 'db' not in configuration_json.keys():
+            raise ConfigurationError("Missing database name")
+
+        self.configuration.update({
+            "charset": "utf8",
+            "use_unicode": True
+        })
+
+    def run_query(self, query):
+        import MySQLdb
+
+        connection = MySQLdb.connect(**self.configuration)
         cursor = connection.cursor()
 
-        logging.debug("mysql got query: %s", query)
-        
+        logger.debug("MySQL running query: %s", query)
+
         try:
             cursor.execute(query)
-            
+
             data = cursor.fetchall()
-            
+
             cursor_desc = cursor.description
-            if (cursor_desc != None):
+            if cursor_desc is not None:
                 num_fields = len(cursor_desc)
                 column_names = [i[0] for i in cursor.description]
-            
+
                 rows = [dict(zip(column_names, row)) for row in data]
 
+                # TODO: add types support
                 columns = [{'name': col_name,
                             'friendly_name': col_name,
                             'type': None} for col_name in column_names]
-            
+
                 data = {'columns': columns, 'rows': rows}
                 json_data = json.dumps(data, cls=JSONEncoder)
                 error = None
             else:
                 json_data = None
                 error = "No data was returned."
-                
+
             cursor.close()
         except MySQLdb.Error, e:
             json_data = None
             error = e.args[1]
         except KeyboardInterrupt:
             error = "Query cancelled by user."
-            json_data = None            
+            json_data = None
         except Exception as e:
             raise sys.exc_info()[1], None, sys.exc_info()[2]
         finally:
             connection.close()
-        
+
         return json_data, error
-        
-    
-    return query_runner
+
+register("mysql", Mysql)
\ No newline at end of file
diff --git a/redash/data/query_runner_pg.py b/redash/query_runner/pg.py
similarity index 50%
rename from redash/data/query_runner_pg.py
rename to redash/query_runner/pg.py
index 4f6edc7737..31ecb62bf1 100644
--- a/redash/data/query_runner_pg.py
+++ b/redash/query_runner/pg.py
@@ -1,65 +1,75 @@
-"""
-QueryRunner is the function that the workers use, to execute queries. This is the PostgreSQL
-version, but easily we can write another to support additional databases (MySQL and others).
-
-Because the worker just pass the query, this can be used with any data store that has some sort of
-query language (for example: HiveQL).
-"""
 import json
-import sys
-import select
 import logging
 import psycopg2
+import select
+import sys
 
+from redash.query_runner import *
 from redash.utils import JSONEncoder
 
+logger = logging.getLogger(__name__)
+
 types_map = {
-    20: 'integer',
-    21: 'integer',
-    23: 'integer',
-    700: 'float',
-    1700: 'float',
-    701: 'float',
-    16: 'boolean',
-    1082: 'date',
-    1114: 'datetime',
-    1184: 'datetime',
-    1014: 'string',
-    1015: 'string',
-    1008: 'string',
-    1009: 'string',
-    2951: 'string'
+    20: TYPE_INTEGER,
+    21: TYPE_INTEGER,
+    23: TYPE_INTEGER,
+    700: TYPE_FLOAT,
+    1700: TYPE_FLOAT,
+    701: TYPE_FLOAT,
+    16: TYPE_BOOLEAN,
+    1082: TYPE_DATE,
+    1114: TYPE_DATETIME,
+    1184: TYPE_DATETIME,
+    1014: TYPE_STRING,
+    1015: TYPE_STRING,
+    1008: TYPE_STRING,
+    1009: TYPE_STRING,
+    2951: TYPE_STRING
 }
 
 
-def pg(connection_string):
-    def column_friendly_name(column_name):
-        return column_name
-
-    def wait(conn):
-        while 1:
-            try:
-                state = conn.poll()
-                if state == psycopg2.extensions.POLL_OK:
-                    break
-                elif state == psycopg2.extensions.POLL_WRITE:
-                    select.select([], [conn.fileno()], [])
-                elif state == psycopg2.extensions.POLL_READ:
-                    select.select([conn.fileno()], [], [])
-                else:
-                    raise psycopg2.OperationalError("poll() returned %s" % state)
-            except select.error:
-                raise psycopg2.OperationalError("select.error received")
-
-    def query_runner(query):
-        connection = psycopg2.connect(connection_string, async=True)
-        wait(connection)
+def _wait(conn):
+    while 1:
+        try:
+            state = conn.poll()
+            if state == psycopg2.extensions.POLL_OK:
+                break
+            elif state == psycopg2.extensions.POLL_WRITE:
+                select.select([], [conn.fileno()], [])
+            elif state == psycopg2.extensions.POLL_READ:
+                select.select([conn.fileno()], [], [])
+            else:
+                raise psycopg2.OperationalError("poll() returned %s" % state)
+        except select.error:
+            raise psycopg2.OperationalError("select.error received")
+
+
+class PostgreSQL(BaseQueryRunner):
+    @classmethod
+    def configuration_fields(cls):
+        return 'user', 'password', 'host', 'port', 'dbname'
+
+    def __init__(self, configuration_json):
+        super(PostgreSQL, self).__init__(configuration_json)
+
+        if 'dbname' not in self.configuration:
+            raise ConfigurationError("Missing dbname")
+
+        values = []
+        for k, v in self.configuration.iteritems():
+            values.append("{}={}".format(k, v))
+
+        self.connection_string = " ".join(values)
+
+    def run_query(self, query):
+        connection = psycopg2.connect(self.connection_string, async=True)
+        _wait(connection)
 
         cursor = connection.cursor()
 
         try:
             cursor.execute(query)
-            wait(connection)
+            _wait(connection)
 
             # While set would be more efficient here, it sorts the data which is not what we want, but due to the small
             # size of the data we can assume it's ok.
@@ -71,14 +81,14 @@ def query_runner(query):
                 # TODO: this deduplication needs to be generalized and reused in all query runners.
                 column_name = column.name
                 if column_name in column_names:
-                    column_name = column_name + str(duplicates_counter)
+                    column_name += str(duplicates_counter)
                     duplicates_counter += 1
 
                 column_names.append(column_name)
 
                 columns.append({
                     'name': column_name,
-                    'friendly_name': column_friendly_name(column_name),
+                    'friendly_name': column_name,
                     'type': types_map.get(column.type_code, None)
                 })
 
@@ -107,4 +117,4 @@ def query_runner(query):
 
         return json_data, error
 
-    return query_runner
+register("pg", PostgreSQL)
\ No newline at end of file
diff --git a/redash/query_runner/script.py b/redash/query_runner/script.py
new file mode 100644
index 0000000000..0f1402ee8e
--- /dev/null
+++ b/redash/query_runner/script.py
@@ -0,0 +1,56 @@
+import os
+import sys
+import subprocess
+
+from redash.query_runner import *
+
+
+class Script(BaseQueryRunner):
+    @classmethod
+    def enabled(cls):
+        return "check_output" in subprocess.__dict__
+
+    @classmethod
+    def configuration_fields(cls):
+        return ("path",)
+
+    @classmethod
+    def annotate_query(cls):
+        return False
+
+    def __init__(self, configuration_json):
+        super(Script, self).__init__(configuration_json)
+
+        # Poor man's protection against running scripts from output the scripts directory
+        if self.configuration["path"].find("../") > -1:
+            raise ConfigurationError("Scripts can only be run from the configured scripts directory")
+
+    def run_query(self, query):
+        try:
+            json_data = None
+            error = None
+
+            query = query.strip()
+
+            script = os.path.join(self.configuration["path"], query)
+            if not os.path.exists(script):
+                return None, "Script '%s' not found in script directory" % query
+
+            output = subprocess.check_output(script, shell=False)
+            if output is not None:
+                output = output.strip()
+                if output != "":
+                    return output, None
+
+            error = "Error reading output"
+        except subprocess.CalledProcessError as e:
+            return None, str(e)
+        except KeyboardInterrupt:
+            error = "Query cancelled by user."
+            json_data = None
+        except Exception as e:
+            raise sys.exc_info()[1], None, sys.exc_info()[2]
+
+        return json_data, error
+
+register("script", Script)
\ No newline at end of file
diff --git a/redash/data/query_runner_url.py b/redash/query_runner/url.py
similarity index 74%
rename from redash/data/query_runner_url.py
rename to redash/query_runner/url.py
index 64f146bd54..bacf121efd 100644
--- a/redash/data/query_runner_url.py
+++ b/redash/query_runner/url.py
@@ -1,16 +1,22 @@
-import json
-import logging
 import sys
-import os
 import urllib2
 
-def url(connection_string):
+from redash.query_runner import *
 
-    def query_runner(query):
-        base_url = connection_string
+
+class Url(BaseQueryRunner):
+    @classmethod
+    def configuration_fields(cls):
+        return ("url",)
+
+    @classmethod
+    def annotate_query(cls):
+        return False
+
+    def run_query(self, query):
+        base_url = self.configuration["url"]
 
         try:
-            json_data = None
             error = None
 
             query = query.strip()
@@ -41,5 +47,4 @@ def query_runner(query):
 
         return json_data, error
 
-    query_runner.annotate_query = False
-    return query_runner
+register("url", Url)
\ No newline at end of file
diff --git a/redash/settings.py b/redash/settings.py
index 68f328c150..42f17b0dcd 100644
--- a/redash/settings.py
+++ b/redash/settings.py
@@ -77,5 +77,16 @@ def parse_boolean(str):
 CLIENT_SIDE_METRICS = parse_boolean(os.environ.get("REDASH_CLIENT_SIDE_METRICS", "false"))
 ANALYTICS = os.environ.get("REDASH_ANALYTICS", "")
 
+# Query Runners
+QUERY_RUNNERS = [
+    'redash.query_runner.big_query',
+    'redash.query_runner.graphite',
+    'redash.query_runner.mongodb',
+    'redash.query_runner.mysql',
+    'redash.query_runner.pg',
+    'redash.query_runner.script',
+    'redash.query_runner.url',
+]
+
 # Features:
 FEATURE_TABLES_PERMISSIONS = parse_boolean(os.environ.get("REDASH_FEATURE_TABLES_PERMISSIONS", "false"))
diff --git a/redash/tasks.py b/redash/tasks.py
index 2a75bb1e17..f224b635a9 100644
--- a/redash/tasks.py
+++ b/redash/tasks.py
@@ -8,7 +8,7 @@
 from redash import redis_connection, models, statsd_client, settings
 from redash.utils import gen_query_hash
 from redash.worker import celery
-from redash.data.query_runner import get_query_runner
+from redash.query_runner import get_query_runner
 
 logger = get_task_logger(__name__)
 
@@ -151,8 +151,6 @@ def refresh_queries():
         outdated_queries_count += 1
 
     statsd_client.gauge('manager.outdated_queries', outdated_queries_count)
-    # TODO: decide if we still need this
-    # statsd_client.gauge('manager.queue_size', self.redis_connection.zcard('jobs'))
 
     logger.info("Done refreshing queries. Found %d outdated queries." % outdated_queries_count)
 
@@ -237,15 +235,15 @@ def execute_query(self, query, data_source_id):
     query_hash = gen_query_hash(query)
     query_runner = get_query_runner(data_source.type, data_source.options)
 
-    if getattr(query_runner, 'annotate_query', True):
-        # TODO: anotate with queu ename
+    if query_runner.annotate_query():
+        # TODO: annotate with queue name
         annotated_query = "/* Task Id: %s, Query hash: %s */ %s" % \
                           (self.request.id, query_hash, query)
     else:
         annotated_query = query
 
     with statsd_client.timer('query_runner.{}.{}.run_time'.format(data_source.type, data_source.name)):
-        data, error = query_runner(annotated_query)
+        data, error = query_runner.run_query(annotated_query)
 
     run_time = time.time() - start_time
     logger.info("Query finished... data length=%s, error=%s", data and len(data), error)
@@ -255,8 +253,6 @@ def execute_query(self, query, data_source_id):
     # Delete query_hash
     redis_connection.delete(QueryTask._job_lock_id(query_hash, data_source.id))
 
-    # TODO: it is possible that storing the data will fail, and we will need to retry
-    # while we already marked the job as done
     if not error:
         query_result = models.QueryResult.store_result(data_source.id, query_hash, query, data, run_time, datetime.datetime.utcnow())
     else:

From af64657260e2d4c4f66f52ccd5b2310a007d9e6a Mon Sep 17 00:00:00 2001
From: Arik Fraimovich <arik@arikfr.com>
Date: Tue, 28 Oct 2014 07:39:45 +0200
Subject: [PATCH 02/13] Migration to update all data source options

---
 .../2014_10_27_update_data_source_config.py   | 54 +++++++++++++++++++
 1 file changed, 54 insertions(+)
 create mode 100644 migrations/2014_10_27_update_data_source_config.py

diff --git a/migrations/2014_10_27_update_data_source_config.py b/migrations/2014_10_27_update_data_source_config.py
new file mode 100644
index 0000000000..9ae418f41f
--- /dev/null
+++ b/migrations/2014_10_27_update_data_source_config.py
@@ -0,0 +1,54 @@
+import json
+
+from redash.models import DataSource
+
+
+def update(data_source):
+    print "[%s] Old options: %s" % (data_source.name, data_source.options)
+    if data_source.type == 'pg':
+        values = data_source.options.split(" ")
+        configuration = {}
+        for value in values:
+            k, v = value.split("=", 1)
+            configuration[k] = v
+        data_source.options = json.dumps(configuration)
+
+    elif data_source.type == 'mysql':
+        values = data_source.options.split(";")
+        configuration = {}
+        for value in values:
+            k, v = value.split("=", 1)
+            configuration[k] = v
+        data_source.options = json.dumps(configuration)
+
+    elif data_source.type == 'graphite':
+        old_config = json.loads(data_source.options)
+
+        configuration = {
+            "url": old_config["url"]
+        }
+
+        if "verify" in old_config:
+            configuration['verify'] = old_config['verify']
+
+        if "auth" in old_config:
+            configuration['username'], configuration['password'] = old_config["auth"]
+
+        data_source.options = json.dumps(configuration)
+
+    elif data_source.type == 'url':
+        data_source.options = json.dumps({"url": data_source.options})
+
+    elif data_source.type == 'script':
+        data_source.options = json.dumps({"path": data_source.options})
+
+    else:
+        print "[%s] No need to convert type of: %s" % (data_source.name, data_source.type)
+
+    print "[%s] New options: %s" % (data_source.name, data_source.options)
+    data_source.save()
+
+
+if __name__ == '__main__':
+    for data_source in DataSource.all():
+        update(data_source)
\ No newline at end of file

From 405834276364256957330f189e31541e81ae1e1e Mon Sep 17 00:00:00 2001
From: Arik Fraimovich <arik@arikfr.com>
Date: Thu, 30 Oct 2014 07:54:00 +0200
Subject: [PATCH 03/13] WIP: configuration object

---
 redash/query_runner/__init__.py          | 43 ++++++++++++++++++++++++
 tests/test_query_runner_configuration.py | 12 +++++++
 2 files changed, 55 insertions(+)
 create mode 100644 tests/test_query_runner_configuration.py

diff --git a/redash/query_runner/__init__.py b/redash/query_runner/__init__.py
index cfd7f69df8..adcd2cf97c 100644
--- a/redash/query_runner/__init__.py
+++ b/redash/query_runner/__init__.py
@@ -30,6 +30,49 @@ class ConfigurationError(RuntimeError):
     pass
 
 
+def _friendly_name(key):
+    return " ".join(key.capitalize().split("_"))
+
+
+class ConfigurationField(object):
+    def __init__(self, key, name=None, mandatory=True, field_type="string"):
+        if name is None:
+            name = _friendly_name(key)
+
+        self.key = key
+        self.name = name
+        self.mandatory = mandatory
+        self.field_type = field_type
+
+    def to_dict(self):
+        return {
+            "key": self.key,
+            "name": self.name,
+            "mandatory": self.mandatory,
+            "field_type": self.field_type
+        }
+
+
+class Configuration(object):
+    def __init__(self, fields):
+        self.fields = {field.key: field for field in fields}
+
+    def parse(self, configuration):
+        parsed = {}
+
+        for key, field in self.fields.iteritems():
+            if field.mandatory and key not in configuration:
+                raise ConfigurationError("Missing mandatory field: {}".format(field.name))
+
+            if key in configuration:
+                parsed[key] = configuration[key]
+
+        return parsed
+
+    def get_input_definition(self):
+        return [field.to_dict() for field in self.fields]
+
+
 class BaseQueryRunner(object):
     def __init__(self, configuration_json):
         try:
diff --git a/tests/test_query_runner_configuration.py b/tests/test_query_runner_configuration.py
new file mode 100644
index 0000000000..16f2eddf9d
--- /dev/null
+++ b/tests/test_query_runner_configuration.py
@@ -0,0 +1,12 @@
+import unittest
+from redash.query_runner import Configuration, ConfigurationField, ConfigurationError
+
+
+class TestConfigurationParsing(unittest.TestCase):
+    def test_parse_raises_error_when_missing_mandatory_fields(self):
+        configuration = Configuration([ConfigurationField("dbname", mandatory=True)])
+        self.assertRaises(ConfigurationError, configuration.parse, {})
+
+    def test_parse_returns_value_when_correct(self):
+        configuration = Configuration([ConfigurationField("dbname", mandatory=True)])
+        self.assertDictEqual(configuration.parse({"dbname":"test"}), {"dbname":"test"})
\ No newline at end of file

From 20af27677284a76987d5454786b997a31e9c15dc Mon Sep 17 00:00:00 2001
From: Arik Fraimovich <arik@arikfr.com>
Date: Sun, 9 Nov 2014 22:00:33 +0200
Subject: [PATCH 04/13] Updated configuration spec to include friendly name and
 more

---
 redash/query_runner/__init__.py          | 20 +++++++++++++-------
 redash/query_runner/big_query.py         |  2 +-
 redash/query_runner/graphite.py          |  2 +-
 redash/query_runner/mongodb.py           |  2 +-
 redash/query_runner/mysql.py             |  2 +-
 redash/query_runner/pg.py                | 11 ++++++-----
 redash/query_runner/script.py            |  2 +-
 redash/query_runner/url.py               |  2 +-
 tests/test_query_runner_configuration.py |  4 ++--
 9 files changed, 27 insertions(+), 20 deletions(-)

diff --git a/redash/query_runner/__init__.py b/redash/query_runner/__init__.py
index adcd2cf97c..ac3acc86f3 100644
--- a/redash/query_runner/__init__.py
+++ b/redash/query_runner/__init__.py
@@ -5,6 +5,8 @@
 
 __all__ = [
     'ConfigurationError',
+    'Configuration',
+    'ConfigurationField',
     'BaseQueryRunner',
     'TYPE_DATETIME',
     'TYPE_BOOLEAN',
@@ -35,20 +37,20 @@ def _friendly_name(key):
 
 
 class ConfigurationField(object):
-    def __init__(self, key, name=None, mandatory=True, field_type="string"):
+    def __init__(self, key, name=None, required=False, field_type="string"):
         if name is None:
             name = _friendly_name(key)
 
         self.key = key
         self.name = name
-        self.mandatory = mandatory
+        self.required = required
         self.field_type = field_type
 
     def to_dict(self):
         return {
             "key": self.key,
             "name": self.name,
-            "mandatory": self.mandatory,
+            "mandatory": self.required,
             "field_type": self.field_type
         }
 
@@ -61,7 +63,7 @@ def parse(self, configuration):
         parsed = {}
 
         for key, field in self.fields.iteritems():
-            if field.mandatory and key not in configuration:
+            if field.required and key not in configuration:
                 raise ConfigurationError("Missing mandatory field: {}".format(field.name))
 
             if key in configuration:
@@ -76,7 +78,11 @@ def get_input_definition(self):
 class BaseQueryRunner(object):
     def __init__(self, configuration_json):
         try:
-            self.configuration = json.loads(configuration_json)
+            configuration_spec = self.configuration_spec()
+            if not isinstance(configuration_spec, Configuration):
+                configuration_spec = Configuration([ConfigurationField(k) for k in configuration_spec])
+
+            self.configuration = configuration_spec.parse(json.loads(configuration_json))
         except ValueError:
             raise ConfigurationError("Invalid configuration syntax")
 
@@ -93,8 +99,8 @@ def annotate_query(cls):
         return True
 
     @classmethod
-    def configuration_fields(cls):
-        return []
+    def configuration_spec(cls):
+        return Configuration([])
 
     def run_query(self, query):
         raise NotImplementedError()
diff --git a/redash/query_runner/big_query.py b/redash/query_runner/big_query.py
index 8abbd72d40..d864951815 100644
--- a/redash/query_runner/big_query.py
+++ b/redash/query_runner/big_query.py
@@ -96,7 +96,7 @@ def enabled(cls):
         return _import()
 
     @classmethod
-    def configuration_fields(cls):
+    def configuration_spec(cls):
         return "serviceAccount", "privateKey", "projectId"
 
     def __init__(self, configuration_json):
diff --git a/redash/query_runner/graphite.py b/redash/query_runner/graphite.py
index 9bc15fd369..de45229559 100644
--- a/redash/query_runner/graphite.py
+++ b/redash/query_runner/graphite.py
@@ -26,7 +26,7 @@ def _transform_result(response):
 
 class Graphite(BaseQueryRunner):
     @classmethod
-    def configuration_fields(cls):
+    def configuration_spec(cls):
         return "url", "username", "password", "verify"
 
     @classmethod
diff --git a/redash/query_runner/mongodb.py b/redash/query_runner/mongodb.py
index d36e10b7b5..aff5c3001c 100644
--- a/redash/query_runner/mongodb.py
+++ b/redash/query_runner/mongodb.py
@@ -55,7 +55,7 @@ def _convert_date(q, field_name):
 
 class MongoDB(BaseQueryRunner):
     @classmethod
-    def configuration_fields(cls):
+    def configuration_spec(cls):
         return "connectionString", "dbName", "replicaSetName"
 
     @classmethod
diff --git a/redash/query_runner/mysql.py b/redash/query_runner/mysql.py
index 78269e79c6..b8fd35a31f 100644
--- a/redash/query_runner/mysql.py
+++ b/redash/query_runner/mysql.py
@@ -10,7 +10,7 @@
 
 class Mysql(BaseQueryRunner):
     @classmethod
-    def configuration_fields(cls):
+    def configuration_spec(cls):
         return "host", "user", "passwd" "db"
 
     @classmethod
diff --git a/redash/query_runner/pg.py b/redash/query_runner/pg.py
index 31ecb62bf1..4e6de5fc89 100644
--- a/redash/query_runner/pg.py
+++ b/redash/query_runner/pg.py
@@ -46,15 +46,16 @@ def _wait(conn):
 
 class PostgreSQL(BaseQueryRunner):
     @classmethod
-    def configuration_fields(cls):
-        return 'user', 'password', 'host', 'port', 'dbname'
+    def configuration_spec(cls):
+        return Configuration([ConfigurationField('user'),
+                              ConfigurationField('password'),
+                              ConfigurationField('host'),
+                              ConfigurationField('port'),
+                              ConfigurationField('dbname', name='Database Name', required=True)])
 
     def __init__(self, configuration_json):
         super(PostgreSQL, self).__init__(configuration_json)
 
-        if 'dbname' not in self.configuration:
-            raise ConfigurationError("Missing dbname")
-
         values = []
         for k, v in self.configuration.iteritems():
             values.append("{}={}".format(k, v))
diff --git a/redash/query_runner/script.py b/redash/query_runner/script.py
index 0f1402ee8e..922f583f91 100644
--- a/redash/query_runner/script.py
+++ b/redash/query_runner/script.py
@@ -11,7 +11,7 @@ def enabled(cls):
         return "check_output" in subprocess.__dict__
 
     @classmethod
-    def configuration_fields(cls):
+    def configuration_spec(cls):
         return ("path",)
 
     @classmethod
diff --git a/redash/query_runner/url.py b/redash/query_runner/url.py
index bacf121efd..d2643fdd1f 100644
--- a/redash/query_runner/url.py
+++ b/redash/query_runner/url.py
@@ -6,7 +6,7 @@
 
 class Url(BaseQueryRunner):
     @classmethod
-    def configuration_fields(cls):
+    def configuration_spec(cls):
         return ("url",)
 
     @classmethod
diff --git a/tests/test_query_runner_configuration.py b/tests/test_query_runner_configuration.py
index 16f2eddf9d..68063c5072 100644
--- a/tests/test_query_runner_configuration.py
+++ b/tests/test_query_runner_configuration.py
@@ -4,9 +4,9 @@
 
 class TestConfigurationParsing(unittest.TestCase):
     def test_parse_raises_error_when_missing_mandatory_fields(self):
-        configuration = Configuration([ConfigurationField("dbname", mandatory=True)])
+        configuration = Configuration([ConfigurationField("dbname", required=True)])
         self.assertRaises(ConfigurationError, configuration.parse, {})
 
     def test_parse_returns_value_when_correct(self):
-        configuration = Configuration([ConfigurationField("dbname", mandatory=True)])
+        configuration = Configuration([ConfigurationField("dbname", required=True)])
         self.assertDictEqual(configuration.parse({"dbname":"test"}), {"dbname":"test"})
\ No newline at end of file

From 8a171ba39a01d9b5dcd884c48dc9d6cf1f9ce482 Mon Sep 17 00:00:00 2001
From: Arik Fraimovich <arik@arikfr.com>
Date: Thu, 29 Jan 2015 11:10:28 +0200
Subject: [PATCH 05/13] Use JSON Schema for data source configuration

---
 .../2014_10_27_update_data_source_config.py   |   3 +
 redash/controllers.py                         |  25 +++++
 redash/query_runner/__init__.py               | 103 +++++++-----------
 redash/query_runner/big_query.py              |  23 +++-
 redash/query_runner/graphite.py               |  27 ++++-
 redash/query_runner/mongodb.py                |  33 +++---
 redash/query_runner/mysql.py                  |  28 ++++-
 redash/query_runner/pg.py                     |  35 ++++--
 redash/query_runner/script.py                 |  17 ++-
 redash/query_runner/url.py                    |  14 ++-
 requirements.txt                              |   1 +
 tests/test_controllers.py                     |  43 +++++++-
 tests/test_query_runner_configuration.py      |  12 --
 13 files changed, 246 insertions(+), 118 deletions(-)
 delete mode 100644 tests/test_query_runner_configuration.py

diff --git a/migrations/2014_10_27_update_data_source_config.py b/migrations/2014_10_27_update_data_source_config.py
index 9ae418f41f..7cabf82f10 100644
--- a/migrations/2014_10_27_update_data_source_config.py
+++ b/migrations/2014_10_27_update_data_source_config.py
@@ -42,6 +42,9 @@ def update(data_source):
     elif data_source.type == 'script':
         data_source.options = json.dumps({"path": data_source.options})
 
+    elif data_source.type == 'mongo':
+        data_source.type = 'mongodb'
+
     else:
         print "[%s] No need to convert type of: %s" % (data_source.name, data_source.type)
 
diff --git a/redash/controllers.py b/redash/controllers.py
index 57a7ff5c1e..e5d15d4b55 100644
--- a/redash/controllers.py
+++ b/redash/controllers.py
@@ -23,6 +23,7 @@
 from redash.tasks import QueryTask, record_event
 from redash.cache import headers as cache_headers
 from redash.permissions import require_permission
+from redash.query_runner import query_runners, validate_configuration
 
 
 @app.route('/ping', methods=['GET'])
@@ -174,11 +175,35 @@ def post(self):
 api.add_resource(MetricsAPI, '/api/metrics/v1/send', endpoint='metrics')
 
 
+class DataSourceTypeListAPI(BaseResource):
+    @require_permission("admin")
+    def get(self):
+        return [q.to_dict() for q in query_runners.values()]
+
+api.add_resource(DataSourceTypeListAPI, '/api/data_sources/types', endpoint='data_source_types')
+
+
 class DataSourceListAPI(BaseResource):
     def get(self):
         data_sources = [ds.to_dict() for ds in models.DataSource.all()]
         return data_sources
 
+    @require_permission("admin")
+    def post(self):
+        req = request.get_json(True)
+        required_fields = ('options', 'name', 'type')
+        for f in required_fields:
+            if f not in req:
+                abort(400)
+
+        if not validate_configuration(req['type'], req['options']):
+            abort(400)
+
+        datasource = models.DataSource.create(name=req['name'], type=req['type'], options=req['options'])
+
+        return datasource.to_dict()
+
+
 api.add_resource(DataSourceListAPI, '/api/data_sources', endpoint='data_sources')
 
 
diff --git a/redash/query_runner/__init__.py b/redash/query_runner/__init__.py
index ac3acc86f3..888bbe5360 100644
--- a/redash/query_runner/__init__.py
+++ b/redash/query_runner/__init__.py
@@ -1,12 +1,13 @@
 import logging
 import json
 
+import jsonschema
+from jsonschema import ValidationError
+
 logger = logging.getLogger(__name__)
 
 __all__ = [
-    'ConfigurationError',
-    'Configuration',
-    'ConfigurationField',
+    'ValidationError',
     'BaseQueryRunner',
     'TYPE_DATETIME',
     'TYPE_BOOLEAN',
@@ -28,67 +29,21 @@
 TYPE_DATE = 'date'
 
 
-class ConfigurationError(RuntimeError):
-    pass
-
-
 def _friendly_name(key):
     return " ".join(key.capitalize().split("_"))
 
 
-class ConfigurationField(object):
-    def __init__(self, key, name=None, required=False, field_type="string"):
-        if name is None:
-            name = _friendly_name(key)
-
-        self.key = key
-        self.name = name
-        self.required = required
-        self.field_type = field_type
-
-    def to_dict(self):
-        return {
-            "key": self.key,
-            "name": self.name,
-            "mandatory": self.required,
-            "field_type": self.field_type
-        }
-
-
-class Configuration(object):
-    def __init__(self, fields):
-        self.fields = {field.key: field for field in fields}
-
-    def parse(self, configuration):
-        parsed = {}
-
-        for key, field in self.fields.iteritems():
-            if field.required and key not in configuration:
-                raise ConfigurationError("Missing mandatory field: {}".format(field.name))
-
-            if key in configuration:
-                parsed[key] = configuration[key]
-
-        return parsed
-
-    def get_input_definition(self):
-        return [field.to_dict() for field in self.fields]
-
-
 class BaseQueryRunner(object):
-    def __init__(self, configuration_json):
-        try:
-            configuration_spec = self.configuration_spec()
-            if not isinstance(configuration_spec, Configuration):
-                configuration_spec = Configuration([ConfigurationField(k) for k in configuration_spec])
-
-            self.configuration = configuration_spec.parse(json.loads(configuration_json))
-        except ValueError:
-            raise ConfigurationError("Invalid configuration syntax")
+    def __init__(self, configuration):
+        jsonschema.validate(configuration, self.configuration_schema())
 
     @classmethod
     def name(cls):
-        raise NotImplementedError()
+        return cls.__name__
+
+    @classmethod
+    def type(cls):
+        return cls.__name__.lower()
 
     @classmethod
     def enabled(cls):
@@ -99,23 +54,31 @@ def annotate_query(cls):
         return True
 
     @classmethod
-    def configuration_spec(cls):
-        return Configuration([])
+    def configuration_schema(cls):
+        return {}
 
     def run_query(self, query):
         raise NotImplementedError()
 
+    @classmethod
+    def to_dict(cls):
+        return {
+            'name': cls.name(),
+            'type': cls.type(),
+            'configuration_schema': cls.configuration_schema()
+        }
+
 
 query_runners = {}
 
 
-def register(query_runner_type, query_runner_class):
+def register(query_runner_class):
     global query_runners
     if query_runner_class.enabled():
-        logger.info("Registering %s query runner.", query_runner_type)
-        query_runners[query_runner_type] = query_runner_class
+        logger.info("Registering %s (%s) query runner.", query_runner_class.name(), query_runner_class.type())
+        query_runners[query_runner_class.type()] = query_runner_class
     else:
-        logger.warning("%s query runner not enabled; not registering", query_runner_type)
+        logger.warning("%s query runner not enabled; not registering", query_runner_class.name())
 
 
 def get_query_runner(query_runner_type, configuration_json):
@@ -124,7 +87,21 @@ def get_query_runner(query_runner_type, configuration_json):
     if query_runner_class is None:
         return None
 
-    return query_runner_class(configuration_json)
+    return query_runner_class(json.loads(configuration_json))
+
+
+def validate_configuration(query_runner_type, configuration_json):
+    global query_runners
+    query_runner_class = query_runners.get(query_runner_type, None)
+    if query_runner_class is None:
+        return False
+
+    try:
+        jsonschema.validate(json.loads(configuration_json), query_runner_class.configuration_schema())
+    except ValidationError:
+        return False
+
+    return True
 
 
 def import_query_runners(query_runner_imports):
diff --git a/redash/query_runner/big_query.py b/redash/query_runner/big_query.py
index d864951815..2505ada953 100644
--- a/redash/query_runner/big_query.py
+++ b/redash/query_runner/big_query.py
@@ -96,8 +96,25 @@ def enabled(cls):
         return _import()
 
     @classmethod
-    def configuration_spec(cls):
-        return "serviceAccount", "privateKey", "projectId"
+    def configuration_schema(cls):
+        return {
+            'type': 'object',
+            'properties': {
+                'serviceAccount': {
+                    'type': 'string',
+                    'title': 'Service Account'
+                },
+                'projectId': {
+                    'type': 'string',
+                    'title': 'Project ID'
+                },
+                'privateKey': {
+                    'type': 'string',
+                    'title': 'Private Key Path'
+                }
+            },
+            'required': ['serviceAccount', 'projectId', 'privateKey']
+        }
 
     def __init__(self, configuration_json):
         super(BigQuery, self).__init__(configuration_json)
@@ -162,4 +179,4 @@ def run_query(self, query):
 
         return json_data, error
 
-register("bigquery", BigQuery)
\ No newline at end of file
+register(BigQuery)
\ No newline at end of file
diff --git a/redash/query_runner/graphite.py b/redash/query_runner/graphite.py
index de45229559..cb17e3c59c 100644
--- a/redash/query_runner/graphite.py
+++ b/redash/query_runner/graphite.py
@@ -26,8 +26,26 @@ def _transform_result(response):
 
 class Graphite(BaseQueryRunner):
     @classmethod
-    def configuration_spec(cls):
-        return "url", "username", "password", "verify"
+    def configuration_schema(cls):
+        return {
+            'type': 'object',
+            'properties': {
+                'url': {
+                    'type': 'string'
+                },
+                'username': {
+                    'type': 'string'
+                },
+                'password': {
+                    'type': 'string'
+                },
+                'verify': {
+                    'type': 'boolean',
+                    'title': 'Verify SSL certificate'
+                }
+            },
+            'required': ['url']
+        }
 
     @classmethod
     def annotate_query(cls):
@@ -36,9 +54,6 @@ def annotate_query(cls):
     def __init__(self, configuration_json):
         super(Graphite, self).__init__(configuration_json)
 
-        if "url" not in self.configuration:
-            raise ConfigurationError("Missing url")
-
         if "username" in self.configuration and self.configuration["username"]:
             self.auth = (self.configuration["username"], self.configuration["password"])
         else:
@@ -65,4 +80,4 @@ def run_query(self, query):
 
         return data, error
 
-register("graphite", Graphite)
\ No newline at end of file
+register(Graphite)
\ No newline at end of file
diff --git a/redash/query_runner/mongodb.py b/redash/query_runner/mongodb.py
index aff5c3001c..67e7d8b7e1 100644
--- a/redash/query_runner/mongodb.py
+++ b/redash/query_runner/mongodb.py
@@ -55,8 +55,25 @@ def _convert_date(q, field_name):
 
 class MongoDB(BaseQueryRunner):
     @classmethod
-    def configuration_spec(cls):
-        return "connectionString", "dbName", "replicaSetName"
+    def configuration_schema(cls):
+        return {
+            'type': 'object',
+            'properties': {
+                'connectionString': {
+                    'type': 'string',
+                    'title': 'Connection String'
+                },
+                'dbName': {
+                    'type': 'string',
+                    'title': "Database Name"
+                },
+                'replicaSetName': {
+                    'type': 'string',
+                    'title': 'Replica Set Name'
+                },
+                'required': ['connectionString']
+            }
+        }
 
     @classmethod
     def enabled(cls):
@@ -70,20 +87,10 @@ def __init__(self, configuration_json):
         _import()
         super(MongoDB, self).__init__(configuration_json)
 
-        if "dbName" not in self.configuration or not connection_string["dbName"]:
-            raise ConfigurationError("dbName is missing from connection string")
-
         self.db_name = self.configuration["dbName"]
 
-        if "connectionString" not in self.configuration or not self.configuration["connectionString"]:
-            raise ConfigurationError("connectionString is missing from connection string")
-
         self.is_replica_set = True if "replicaSetName" in self.configuration and self.configuration["replicaSetName"] else False
 
-        if self.is_replica_set and not self.configuration["replicaSetName"]:
-            raise ConfigurationError("replicaSetName is set in the connection string JSON but is empty")
-
-
     def run_query(self, query):
         if self.is_replica_set:
             db_connection = pymongo.MongoReplicaSetClient(self.configuration["connectionString"], replicaSet=self.configuration["replicaSetName"])
@@ -172,4 +179,4 @@ def run_query(self, query):
 
         return json_data, error
 
-register("mongo", MongoDB)
\ No newline at end of file
+register(MongoDB)
\ No newline at end of file
diff --git a/redash/query_runner/mysql.py b/redash/query_runner/mysql.py
index b8fd35a31f..023c9de9e4 100644
--- a/redash/query_runner/mysql.py
+++ b/redash/query_runner/mysql.py
@@ -10,8 +10,27 @@
 
 class Mysql(BaseQueryRunner):
     @classmethod
-    def configuration_spec(cls):
-        return "host", "user", "passwd" "db"
+    def configuration_schema(cls):
+        return {
+            'type': 'object',
+            'properties': {
+                'host': {
+                    'type': 'string'
+                },
+                'user': {
+                    'type': 'string'
+                },
+                'passwd': {
+                    'type': 'string',
+                    'title': 'Password'
+                },
+                'db': {
+                    'type': 'string',
+                    'title': 'Database name'
+                }
+            },
+            'required': ['db']
+        }
 
     @classmethod
     def enabled(cls):
@@ -25,9 +44,6 @@ def enabled(cls):
     def __init__(self, configuration_json):
         super(Mysql, self).__init__(configuration_json)
 
-        if 'db' not in configuration_json.keys():
-            raise ConfigurationError("Missing database name")
-
         self.configuration.update({
             "charset": "utf8",
             "use_unicode": True
@@ -79,4 +95,4 @@ def run_query(self, query):
 
         return json_data, error
 
-register("mysql", Mysql)
\ No newline at end of file
+register(Mysql)
\ No newline at end of file
diff --git a/redash/query_runner/pg.py b/redash/query_runner/pg.py
index 4e6de5fc89..811d52a2dc 100644
--- a/redash/query_runner/pg.py
+++ b/redash/query_runner/pg.py
@@ -46,12 +46,33 @@ def _wait(conn):
 
 class PostgreSQL(BaseQueryRunner):
     @classmethod
-    def configuration_spec(cls):
-        return Configuration([ConfigurationField('user'),
-                              ConfigurationField('password'),
-                              ConfigurationField('host'),
-                              ConfigurationField('port'),
-                              ConfigurationField('dbname', name='Database Name', required=True)])
+    def configuration_schema(cls):
+        return {
+            "type": "object",
+            "properties": {
+                "user": {
+                    "type": "string"
+                },
+                "password": {
+                    "type": "string"
+                },
+                "host": {
+                    "type": "string"
+                },
+                "port": {
+                    "type": "number"
+                },
+                "dbname": {
+                    "type": "string",
+                    "title": "Database Name"
+                }
+            },
+            "required": ["dbname"]
+        }
+
+    @classmethod
+    def type(cls):
+        return "pg"
 
     def __init__(self, configuration_json):
         super(PostgreSQL, self).__init__(configuration_json)
@@ -118,4 +139,4 @@ def run_query(self, query):
 
         return json_data, error
 
-register("pg", PostgreSQL)
\ No newline at end of file
+register(PostgreSQL)
\ No newline at end of file
diff --git a/redash/query_runner/script.py b/redash/query_runner/script.py
index 922f583f91..e25e27fdba 100644
--- a/redash/query_runner/script.py
+++ b/redash/query_runner/script.py
@@ -11,8 +11,17 @@ def enabled(cls):
         return "check_output" in subprocess.__dict__
 
     @classmethod
-    def configuration_spec(cls):
-        return ("path",)
+    def configuration_schema(cls):
+        return {
+            'type': 'object',
+            'properties': {
+                'path': {
+                    'type': 'string',
+                    'title': 'Scripts path'
+                }
+            },
+            'required': ['path']
+        }
 
     @classmethod
     def annotate_query(cls):
@@ -23,7 +32,7 @@ def __init__(self, configuration_json):
 
         # Poor man's protection against running scripts from output the scripts directory
         if self.configuration["path"].find("../") > -1:
-            raise ConfigurationError("Scripts can only be run from the configured scripts directory")
+            raise ValidationError("Scripts can only be run from the configured scripts directory")
 
     def run_query(self, query):
         try:
@@ -53,4 +62,4 @@ def run_query(self, query):
 
         return json_data, error
 
-register("script", Script)
\ No newline at end of file
+register(Script)
\ No newline at end of file
diff --git a/redash/query_runner/url.py b/redash/query_runner/url.py
index d2643fdd1f..102df91959 100644
--- a/redash/query_runner/url.py
+++ b/redash/query_runner/url.py
@@ -6,8 +6,16 @@
 
 class Url(BaseQueryRunner):
     @classmethod
-    def configuration_spec(cls):
-        return ("url",)
+    def configuration_schema(cls):
+        return {
+            'type': 'object',
+            'properties': {
+                'url': {
+                    'type': 'string',
+                    'title': 'URL base path'
+                }
+            }
+        }
 
     @classmethod
     def annotate_query(cls):
@@ -47,4 +55,4 @@ def run_query(self, query):
 
         return json_data, error
 
-register("url", Url)
\ No newline at end of file
+register(Url)
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index ee94503011..0e56de7aea 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -23,3 +23,4 @@ honcho==0.5.0
 statsd==2.1.2
 gunicorn==18.0
 celery==3.1.11
+jsonschema==2.4.0
diff --git a/tests/test_controllers.py b/tests/test_controllers.py
index 5bc49d4885..b78644b96b 100644
--- a/tests/test_controllers.py
+++ b/tests/test_controllers.py
@@ -472,4 +472,45 @@ def test_logout_when_loggedin(self):
             self.assertTrue(current_user.is_authenticated())
             rv = c.get('/logout')
             self.assertEquals(rv.status_code, 302)
-            self.assertFalse(current_user.is_authenticated())
\ No newline at end of file
+            self.assertFalse(current_user.is_authenticated())
+
+
+class DataSourceTypesTest(BaseTestCase):
+    def test_returns_data_for_admin(self):
+        admin = user_factory.create(groups=['admin', 'default'])
+        with app.test_client() as c, authenticated_user(c, user=admin):
+            rv = c.get("/api/data_sources/types")
+            self.assertEqual(rv.status_code, 200)
+
+    def test_returns_403_for_non_admin(self):
+        with app.test_client() as c, authenticated_user(c):
+            rv = c.get("/api/data_sources/types")
+            self.assertEqual(rv.status_code, 403)
+
+
+class DataSourceTest(BaseTestCase):
+    def test_returns_400_when_missing_fields(self):
+        admin = user_factory.create(groups=['admin', 'default'])
+        with app.test_client() as c, authenticated_user(c, user=admin):
+            rv = c.post("/api/data_sources")
+            self.assertEqual(rv.status_code, 400)
+
+            rv = json_request(c.post, '/api/data_sources', data={'name': 'DS 1'})
+
+            self.assertEqual(rv.status_code, 400)
+
+    def test_returns_400_when_configuration_invalid(self):
+        admin = user_factory.create(groups=['admin', 'default'])
+        with app.test_client() as c, authenticated_user(c, user=admin):
+            rv = json_request(c.post, '/api/data_sources',
+                              data={'name': 'DS 1', 'type': 'pg', 'options': '{}'})
+
+            self.assertEqual(rv.status_code, 400)
+
+    def test_creates_data_source(self):
+        admin = user_factory.create(groups=['admin', 'default'])
+        with app.test_client() as c, authenticated_user(c, user=admin):
+            rv = json_request(c.post, '/api/data_sources',
+                              data={'name': 'DS 1', 'type': 'pg', 'options': '{"dbname": "redash"}'})
+
+            self.assertEqual(rv.status_code, 200)
\ No newline at end of file
diff --git a/tests/test_query_runner_configuration.py b/tests/test_query_runner_configuration.py
deleted file mode 100644
index 68063c5072..0000000000
--- a/tests/test_query_runner_configuration.py
+++ /dev/null
@@ -1,12 +0,0 @@
-import unittest
-from redash.query_runner import Configuration, ConfigurationField, ConfigurationError
-
-
-class TestConfigurationParsing(unittest.TestCase):
-    def test_parse_raises_error_when_missing_mandatory_fields(self):
-        configuration = Configuration([ConfigurationField("dbname", required=True)])
-        self.assertRaises(ConfigurationError, configuration.parse, {})
-
-    def test_parse_returns_value_when_correct(self):
-        configuration = Configuration([ConfigurationField("dbname", required=True)])
-        self.assertDictEqual(configuration.parse({"dbname":"test"}), {"dbname":"test"})
\ No newline at end of file

From 0abce2738157516fa843cede725453a1dcc10c00 Mon Sep 17 00:00:00 2001
From: Arik Fraimovich <arik@arikfr.com>
Date: Thu, 29 Jan 2015 11:21:59 +0200
Subject: [PATCH 06/13] Set configuration in base ctor

---
 redash/query_runner/__init__.py | 7 +------
 redash/query_runner/graphite.py | 2 +-
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/redash/query_runner/__init__.py b/redash/query_runner/__init__.py
index 888bbe5360..14ca83e46d 100644
--- a/redash/query_runner/__init__.py
+++ b/redash/query_runner/__init__.py
@@ -29,13 +29,10 @@
 TYPE_DATE = 'date'
 
 
-def _friendly_name(key):
-    return " ".join(key.capitalize().split("_"))
-
-
 class BaseQueryRunner(object):
     def __init__(self, configuration):
         jsonschema.validate(configuration, self.configuration_schema())
+        self.configuration = configuration
 
     @classmethod
     def name(cls):
@@ -82,7 +79,6 @@ def register(query_runner_class):
 
 
 def get_query_runner(query_runner_type, configuration_json):
-    global query_runners
     query_runner_class = query_runners.get(query_runner_type, None)
     if query_runner_class is None:
         return None
@@ -91,7 +87,6 @@ def get_query_runner(query_runner_type, configuration_json):
 
 
 def validate_configuration(query_runner_type, configuration_json):
-    global query_runners
     query_runner_class = query_runners.get(query_runner_type, None)
     if query_runner_class is None:
         return False
diff --git a/redash/query_runner/graphite.py b/redash/query_runner/graphite.py
index cb17e3c59c..d6fc97bfb6 100644
--- a/redash/query_runner/graphite.py
+++ b/redash/query_runner/graphite.py
@@ -59,7 +59,7 @@ def __init__(self, configuration_json):
         else:
             self.auth = None
 
-        self.verify = self.configuration["verify"] == "true"
+        self.verify = self.configuration["verify"]
         self.base_url = "%s/render?format=json&" % self.configuration['url']
 
     def run_query(self, query):

From d798c77574942740a53d42c582f7869c8cd36085 Mon Sep 17 00:00:00 2001
From: Arik Fraimovich <arik@arikfr.com>
Date: Mon, 2 Mar 2015 07:34:06 +0200
Subject: [PATCH 07/13] Support for already valid data source config

---
 ...a_source_config.py => 0003_update_data_source_config.py} | 6 ++++++
 redash/query_runner/__init__.py                             | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 rename migrations/{2014_10_27_update_data_source_config.py => 0003_update_data_source_config.py} (89%)

diff --git a/migrations/2014_10_27_update_data_source_config.py b/migrations/0003_update_data_source_config.py
similarity index 89%
rename from migrations/2014_10_27_update_data_source_config.py
rename to migrations/0003_update_data_source_config.py
index 7cabf82f10..7196328428 100644
--- a/migrations/2014_10_27_update_data_source_config.py
+++ b/migrations/0003_update_data_source_config.py
@@ -1,10 +1,16 @@
 import json
 
+from redash import query_runner
 from redash.models import DataSource
 
 
 def update(data_source):
     print "[%s] Old options: %s" % (data_source.name, data_source.options)
+
+    if query_runner.validate_configuration(data_source.type, data_source.options):
+        print "[%s] configuration already valid. skipping." % data_source.name
+        return
+
     if data_source.type == 'pg':
         values = data_source.options.split(" ")
         configuration = {}
diff --git a/redash/query_runner/__init__.py b/redash/query_runner/__init__.py
index 14ca83e46d..379b0fd1f4 100644
--- a/redash/query_runner/__init__.py
+++ b/redash/query_runner/__init__.py
@@ -93,7 +93,7 @@ def validate_configuration(query_runner_type, configuration_json):
 
     try:
         jsonschema.validate(json.loads(configuration_json), query_runner_class.configuration_schema())
-    except ValidationError:
+    except (ValidationError, ValueError):
         return False
 
     return True

From 8fae6de8c704f7cf0eb49aa012280f6abba38f53 Mon Sep 17 00:00:00 2001
From: Arik Fraimovich <arik@arikfr.com>
Date: Mon, 2 Mar 2015 09:40:15 +0200
Subject: [PATCH 08/13] Update datasource CLI to use new format

---
 redash/cli/data_sources.py | 60 ++++++++++++++++++++++++++++++++++++--
 requirements.txt           |  1 +
 2 files changed, 59 insertions(+), 2 deletions(-)

diff --git a/redash/cli/data_sources.py b/redash/cli/data_sources.py
index 909ec037bb..067607c807 100644
--- a/redash/cli/data_sources.py
+++ b/redash/cli/data_sources.py
@@ -1,3 +1,4 @@
+import click
 from flask.ext.script import Manager
 from redash import models
 
@@ -14,10 +15,65 @@ def list():
 
 
 @manager.command
-def new(name, type, options):
+def new(name=None, type=None, options=None):
     """Create new data source"""
-    # TODO: validate it's a valid type and in the future, validate the options.
+
+    import json
+    from redash.query_runner import query_runners, validate_configuration
+
+    if name is None:
+        name = click.prompt("Name")
+
+    if type is None:
+        print "Select type:"
+        for i, query_runner_name in enumerate(query_runners.keys()):
+            print "{}. {}".format(i+1, query_runner_name)
+
+        idx = 0
+        while idx < 1 or idx > len(query_runners.keys()):
+            idx = click.prompt("[{}-{}]".format(1, len(query_runners.keys())), type=int)
+
+        type = query_runners.keys()[idx-1]
+    elif type not in query_runners.keys():
+        print "Error: type not supported (supported types: {})".format(", ".join(query_runners.keys()))
+        exit()
+
+    if options is None:
+        query_runner = query_runners[type]
+        schema = query_runner.configuration_schema()
+
+        types = {
+            'string': unicode,
+            'number': int,
+            'boolean': bool
+        }
+
+        options_obj = {}
+
+        for k, prop in schema['properties'].iteritems():
+            required = k in schema.get('required', [])
+            default_value = "<<DEFAULT_VALUE>>"
+            if required:
+                default_value = None
+
+            prompt = prop.get('title', k.capitalize())
+            if required:
+                prompt = "{} (required)".format(prompt)
+            else:
+                prompt = "{} (optional)".format(prompt)
+
+            value = click.prompt(prompt, default=default_value, type=types[prop['type']], show_default=False)
+            if value != default_value:
+                options_obj[k] = value
+
+        options = json.dumps(options_obj)
+
+    if not validate_configuration(type, options):
+        print "Error: invalid configuration."
+        exit()
+
     print "Creating {} data source ({}) with options:\n{}".format(type, name, options)
+
     data_source = models.DataSource.create(name=name,
                                            type=type,
                                            options=options)
diff --git a/requirements.txt b/requirements.txt
index 0e56de7aea..46e48f4445 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -24,3 +24,4 @@ statsd==2.1.2
 gunicorn==18.0
 celery==3.1.11
 jsonschema==2.4.0
+click==3.3

From 2107b79a80d09ebfbfa7953a76b705d36df38975 Mon Sep 17 00:00:00 2001
From: Arik Fraimovich <arik@arikfr.com>
Date: Mon, 2 Mar 2015 09:44:55 +0200
Subject: [PATCH 09/13] Use validation for data source editing

---
 redash/cli/data_sources.py | 33 +++++++++++++++++++++++----------
 1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/redash/cli/data_sources.py b/redash/cli/data_sources.py
index 067607c807..9948fb17d7 100644
--- a/redash/cli/data_sources.py
+++ b/redash/cli/data_sources.py
@@ -1,6 +1,8 @@
+import json
 import click
 from flask.ext.script import Manager
 from redash import models
+from redash.query_runner import query_runners, validate_configuration
 
 manager = Manager(help="Data sources management commands.")
 
@@ -14,13 +16,20 @@ def list():
         print "Id: {}\nName: {}\nType: {}\nOptions: {}".format(ds.id, ds.name, ds.type, ds.options)
 
 
+def validate_data_source_type(type):
+    if type not in query_runners.keys():
+        print "Error: the type \"{}\" is not supported (supported types: {}).".format(type, ", ".join(query_runners.keys()))
+        exit()
+
+
+def validate_data_source_options(type, options):
+    if not validate_configuration(type, options):
+        print "Error: invalid configuration."
+        exit()
+
 @manager.command
 def new(name=None, type=None, options=None):
     """Create new data source"""
-
-    import json
-    from redash.query_runner import query_runners, validate_configuration
-
     if name is None:
         name = click.prompt("Name")
 
@@ -34,9 +43,8 @@ def new(name=None, type=None, options=None):
             idx = click.prompt("[{}-{}]".format(1, len(query_runners.keys())), type=int)
 
         type = query_runners.keys()[idx-1]
-    elif type not in query_runners.keys():
-        print "Error: type not supported (supported types: {})".format(", ".join(query_runners.keys()))
-        exit()
+    else:
+        validate_data_source_type(type)
 
     if options is None:
         query_runner = query_runners[type]
@@ -68,9 +76,7 @@ def new(name=None, type=None, options=None):
 
         options = json.dumps(options_obj)
 
-    if not validate_configuration(type, options):
-        print "Error: invalid configuration."
-        exit()
+    validate_data_source_options(type, options)
 
     print "Creating {} data source ({}) with options:\n{}".format(type, name, options)
 
@@ -105,7 +111,14 @@ def update_attr(obj, attr, new_value):
 def edit(name, new_name=None, options=None, type=None):
     """Edit data source settings (name, options, type)"""
     try:
+        if type is not None:
+            validate_data_source_type(type)
+
         data_source = models.DataSource.get(models.DataSource.name==name)
+
+        if options is not None:
+            validate_data_source_options(data_source.type, options)
+
         update_attr(data_source, "name", new_name)
         update_attr(data_source, "type", type)
         update_attr(data_source, "options", options)

From c816f1003d71c691e33dcb155f6612b5f9e58c13 Mon Sep 17 00:00:00 2001
From: Arik Fraimovich <arik@arikfr.com>
Date: Mon, 2 Mar 2015 09:45:29 +0200
Subject: [PATCH 10/13] Bump version

---
 redash/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/redash/__init__.py b/redash/__init__.py
index 8dc7da6599..2286b624e7 100644
--- a/redash/__init__.py
+++ b/redash/__init__.py
@@ -6,7 +6,7 @@
 from redash import settings
 from redash.query_runner import import_query_runners
 
-__version__ = '0.5.0'
+__version__ = '0.6.0'
 
 
 def setup_logging():

From 9442fd9465c527005302511b91ca8e566952ab9a Mon Sep 17 00:00:00 2001
From: Arik Fraimovich <arik@arikfr.com>
Date: Mon, 2 Mar 2015 09:49:17 +0200
Subject: [PATCH 11/13] Update logging messages

---
 redash/query_runner/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/redash/query_runner/__init__.py b/redash/query_runner/__init__.py
index 379b0fd1f4..eaf5693a57 100644
--- a/redash/query_runner/__init__.py
+++ b/redash/query_runner/__init__.py
@@ -72,10 +72,10 @@ def to_dict(cls):
 def register(query_runner_class):
     global query_runners
     if query_runner_class.enabled():
-        logger.info("Registering %s (%s) query runner.", query_runner_class.name(), query_runner_class.type())
+        logger.debug("Registering %s (%s) query runner.", query_runner_class.name(), query_runner_class.type())
         query_runners[query_runner_class.type()] = query_runner_class
     else:
-        logger.warning("%s query runner not enabled; not registering", query_runner_class.name())
+        logger.warning("%s query runner enabled but not supported, not registering. Either disable or install missing dependencies.", query_runner_class.name())
 
 
 def get_query_runner(query_runner_type, configuration_json):

From 9344cbd07859c23ef563f187a5aab869a4276d14 Mon Sep 17 00:00:00 2001
From: Arik Fraimovich <arik@arikfr.com>
Date: Sun, 8 Mar 2015 10:38:50 +0200
Subject: [PATCH 12/13] Update bootstrap script to support new format

---
 setup/bootstrap.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup/bootstrap.sh b/setup/bootstrap.sh
index 9c6c89215f..d916e809b9 100644
--- a/setup/bootstrap.sh
+++ b/setup/bootstrap.sh
@@ -146,7 +146,7 @@ if [ $pg_user_exists -ne 0 ]; then
     sudo -u redash psql -c "grant select on activity_log, events, queries, dashboards, widgets, visualizations, query_results to redash_reader;" redash
 
     cd /opt/redash/current
-    sudo -u redash bin/run ./manage.py ds new "re:dash metadata" "pg" "user=redash_reader password=$REDASH_READER_PASSWORD host=localhost dbname=redash"
+    sudo -u redash bin/run ./manage.py ds new "re:dash metadata" "pg" "{\"user\": \"redash_reader\", \"password\": \"$REDASH_READER_PASSWORD\", \"host\": \"localhost\", \"dbname\": \"redash\"}"
 fi
 
 # BigQuery dependencies:

From 40335a0e2189fa5ef459ce5e554dd3513ef8cd85 Mon Sep 17 00:00:00 2001
From: Arik Fraimovich <arik@arikfr.com>
Date: Sun, 8 Mar 2015 11:00:56 +0200
Subject: [PATCH 13/13] Fix: add missing option flags

---
 setup/bootstrap.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup/bootstrap.sh b/setup/bootstrap.sh
index d916e809b9..37ef41d6f8 100644
--- a/setup/bootstrap.sh
+++ b/setup/bootstrap.sh
@@ -146,7 +146,7 @@ if [ $pg_user_exists -ne 0 ]; then
     sudo -u redash psql -c "grant select on activity_log, events, queries, dashboards, widgets, visualizations, query_results to redash_reader;" redash
 
     cd /opt/redash/current
-    sudo -u redash bin/run ./manage.py ds new "re:dash metadata" "pg" "{\"user\": \"redash_reader\", \"password\": \"$REDASH_READER_PASSWORD\", \"host\": \"localhost\", \"dbname\": \"redash\"}"
+    sudo -u redash bin/run ./manage.py ds new -n "re:dash metadata" -t "pg" -o "{\"user\": \"redash_reader\", \"password\": \"$REDASH_READER_PASSWORD\", \"host\": \"localhost\", \"dbname\": \"redash\"}"
 fi
 
 # BigQuery dependencies: