Skip to content

Commit

Permalink
ENH: 'to_sql()' add param 'method' to control insert statement (panda…
Browse files Browse the repository at this point in the history
…s-dev#21103)

Also revert default insert method to NOT use multi-value.
  • Loading branch information
schettino72 committed May 25, 2018
1 parent 3147a86 commit fce019b
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 30 deletions.
4 changes: 2 additions & 2 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2014,7 +2014,7 @@ def to_msgpack(self, path_or_buf=None, encoding='utf-8', **kwargs):
**kwargs)

def to_sql(self, name, con, schema=None, if_exists='fail', index=True,
index_label=None, chunksize=None, dtype=None):
index_label=None, chunksize=None, dtype=None, method=None):
"""
Write records stored in a DataFrame to a SQL database.
Expand Down Expand Up @@ -2124,7 +2124,7 @@ def to_sql(self, name, con, schema=None, if_exists='fail', index=True,
from pandas.io import sql
sql.to_sql(self, name, con, schema=schema, if_exists=if_exists,
index=index, index_label=index_label, chunksize=chunksize,
dtype=dtype)
dtype=dtype, method=method)

def to_pickle(self, path, compression='infer',
protocol=pkl.HIGHEST_PROTOCOL):
Expand Down
87 changes: 59 additions & 28 deletions pandas/io/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

from __future__ import print_function, division
from datetime import datetime, date, time
import csv
from io import StringIO

import warnings
import re
Expand Down Expand Up @@ -398,7 +400,7 @@ def read_sql(sql, con, index_col=None, coerce_float=True, params=None,


def to_sql(frame, name, con, schema=None, if_exists='fail', index=True,
index_label=None, chunksize=None, dtype=None):
index_label=None, chunksize=None, dtype=None, method=None):
"""
Write records stored in a DataFrame to a SQL database.
Expand Down Expand Up @@ -447,7 +449,7 @@ def to_sql(frame, name, con, schema=None, if_exists='fail', index=True,

pandas_sql.to_sql(frame, name, if_exists=if_exists, index=index,
index_label=index_label, schema=schema,
chunksize=chunksize, dtype=dtype)
chunksize=chunksize, dtype=dtype, method=method)


def has_table(table_name, con, schema=None):
Expand Down Expand Up @@ -572,29 +574,47 @@ def create(self):
else:
self._execute_create()

def insert_statement(self, data, conn):
"""
Generate tuple of SQLAlchemy insert statement and any arguments
to be executed by connection (via `_execute_insert`).
def _exec_insert(self, conn, keys, data_iter):
"""Execute SQL statement inserting data
Parameters
----------
conn : SQLAlchemy connectable(engine/connection)
Connection to recieve the data
data : list of dict
The data to be inserted
data : list of list
of values to be inserted
"""
data = [{k: v for k, v in zip(keys, row)} for row in data_iter]
conn.execute(self.table.insert(), data)

Returns
-------
SQLAlchemy statement
insert statement
*, optional
Additional parameters to be passed when executing insert statement
def _exec_insert_multi(self, conn, keys, data_iter):
"""Alternative to _exec_insert for DBs that support multivalue INSERT.
Note: multi-value insert is usually faster for a few columns
but performance degrades quickly with increase of columns.
"""
dialect = getattr(conn, 'dialect', None)
if dialect and getattr(dialect, 'supports_multivalues_insert', False):
return self.table.insert(data),
return self.table.insert(), data
data = [{k: v for k, v in zip(keys, row)} for row in data_iter]
conn.execute(self.table.insert(data))

def _exec_insert_copy(self, conn, keys, data_iter):
"""Alternative to _exec_insert for DBs that support COPY FROM
"""
# gets a DBAPI connection that can provide a cursor
dbapi_conn = conn.connection
with dbapi_conn.cursor() as cur:
s_buf = StringIO()
writer = csv.writer(s_buf)
writer.writerows(data_iter)
s_buf.seek(0)

columns = ', '.join('"{}"'.format(k) for k in keys)
if self.schema:
table_name = '{}.{}'.format(self.schema, self.name)
else:
table_name = self.name

sql = 'COPY {} ({}) FROM STDIN WITH CSV'.format(
table_name, columns)
cur.copy_expert(sql=sql, file=s_buf)


def insert_data(self):
if self.index is not None:
Expand Down Expand Up @@ -632,12 +652,20 @@ def insert_data(self):

return column_names, data_list

def _execute_insert(self, conn, keys, data_iter):
"""Insert data into this table with database connection"""
data = [{k: v for k, v in zip(keys, row)} for row in data_iter]
conn.execute(*self.insert_statement(data, conn))

def insert(self, chunksize=None):
def insert(self, chunksize=None, method=None):

# set insert method
if method in (None, 'default'):
exec_insert = self._exec_insert
elif method == 'multi':
exec_insert = self._exec_insert_multi
elif method == 'copy':
exec_insert = self._exec_insert_copy
else:
# TODO: support callables?
raise ValueError('Invalid parameter `method`: {}'.format(method))

keys, data_list = self.insert_data()

nrows = len(self.frame)
Expand All @@ -660,7 +688,9 @@ def insert(self, chunksize=None):
break

chunk_iter = zip(*[arr[start_i:end_i] for arr in data_list])
self._execute_insert(conn, keys, chunk_iter)
exec_insert(conn, keys, chunk_iter)



def _query_iterator(self, result, chunksize, columns, coerce_float=True,
parse_dates=None):
Expand Down Expand Up @@ -1100,7 +1130,8 @@ def read_query(self, sql, index_col=None, coerce_float=True,
read_sql = read_query

def to_sql(self, frame, name, if_exists='fail', index=True,
index_label=None, schema=None, chunksize=None, dtype=None):
index_label=None, schema=None, chunksize=None, dtype=None,
method=None):
"""
Write records stored in a DataFrame to a SQL database.
Expand Down Expand Up @@ -1146,7 +1177,7 @@ def to_sql(self, frame, name, if_exists='fail', index=True,
if_exists=if_exists, index_label=index_label,
schema=schema, dtype=dtype)
table.create()
table.insert(chunksize)
table.insert(chunksize, method=method)
if (not name.isdigit() and not name.islower()):
# check for potentially case sensitivity issues (GH7815)
# Only check when name is not a number and name is not lower case
Expand Down

0 comments on commit fce019b

Please sign in to comment.