Skip to content

Commit

Permalink
update from master
Browse files Browse the repository at this point in the history
  • Loading branch information
vaibhavhrt committed Jul 17, 2019
2 parents 58a7da6 + 26bd34d commit b405d2d
Show file tree
Hide file tree
Showing 1,045 changed files with 143,771 additions and 110,906 deletions.
1 change: 1 addition & 0 deletions .github/PULL_REQUEST_TEMPLATE.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
- [ ] closes #xxxx
- [ ] tests added / passed
- [ ] passes `black pandas`
- [ ] passes `git diff upstream/master -u -- "*.py" | flake8 --diff`
- [ ] whatsnew entry
16 changes: 16 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
repos:
- repo: https://github.com/python/black
rev: stable
hooks:
- id: black
language_version: python3.7
- repo: https://gitlab.com/pycqa/flake8
rev: 3.7.7
hooks:
- id: flake8
language: python_venv
- repo: https://github.com/pre-commit/mirrors-isort
rev: v4.3.20
hooks:
- id: isort
language: python_venv
8 changes: 0 additions & 8 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,17 +48,10 @@ matrix:
env:
- JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow"

# In allow_failures
- dist: trusty
env:
- JOB="3.6, doc" ENV_FILE="ci/deps/travis-36-doc.yaml" DOC=true
allow_failures:
- dist: trusty
env:
- JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow"
- dist: trusty
env:
- JOB="3.6, doc" ENV_FILE="ci/deps/travis-36-doc.yaml" DOC=true

before_install:
- echo "before_install"
Expand Down Expand Up @@ -97,7 +90,6 @@ before_script:
script:
- echo "script start"
- source activate pandas-dev
- ci/build_docs.sh
- ci/run_tests.sh

after_script:
Expand Down
2 changes: 2 additions & 0 deletions LICENSES/HAVEN_LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
YEAR: 2013-2016
COPYRIGHT HOLDER: Hadley Wickham; RStudio; and Evan Miller
32 changes: 32 additions & 0 deletions LICENSES/HAVEN_MIT
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
Based on http://opensource.org/licenses/MIT

This is a template. Complete and ship as file LICENSE the following 2
lines (only)

YEAR:
COPYRIGHT HOLDER:

and specify as

License: MIT + file LICENSE

Copyright (c) <YEAR>, <COPYRIGHT HOLDER>

Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:

The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
10 changes: 5 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
tseries: pandas/_libs/lib.pyx pandas/_libs/tslib.pyx pandas/_libs/hashtable.pyx
python setup.py build_ext --inplace

.PHONY : develop build clean clean_pyc tseries doc
.PHONY : develop build clean clean_pyc doc lint-diff black

clean:
-python setup.py clean
Expand All @@ -15,8 +12,11 @@ build: clean_pyc
lint-diff:
git diff upstream/master --name-only -- "*.py" | xargs flake8

black:
black . --exclude '(asv_bench/env|\.egg|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|_build|buck-out|build|dist)'

develop: build
-python setup.py develop
python setup.py develop

doc:
-rm -rf doc/build doc/source/generated
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ Most development discussion is taking place on github in this repo. Further, the

All contributions, bug reports, bug fixes, documentation improvements, enhancements and ideas are welcome.

A detailed overview on how to contribute can be found in the **[contributing guide](https://pandas-docs.github.io/pandas-docs-travis/contributing.html)**. There is also an [overview](.github/CONTRIBUTING.md) on GitHub.
A detailed overview on how to contribute can be found in the **[contributing guide](https://dev.pandas.io/contributing.html)**. There is also an [overview](.github/CONTRIBUTING.md) on GitHub.

If you are simply looking to start working with the pandas codebase, navigate to the [GitHub "issues" tab](https://github.com/pandas-dev/pandas/issues) and start looking through interesting issues. There are a number of issues listed under [Docs](https://github.com/pandas-dev/pandas/issues?labels=Docs&sort=updated&state=open) and [good first issue](https://github.com/pandas-dev/pandas/issues?labels=good+first+issue&sort=updated&state=open) where you could start out.

Expand Down
146 changes: 94 additions & 52 deletions asv_bench/benchmarks/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,28 +2,45 @@

import numpy as np

from pandas._libs import lib

import pandas as pd
from pandas.util import testing as tm

for imp in ['pandas.util', 'pandas.tools.hashing']:
for imp in ["pandas.util", "pandas.tools.hashing"]:
try:
hashing = import_module(imp)
break
except (ImportError, TypeError, ValueError):
pass


class MaybeConvertObjects:
def setup(self):
N = 10 ** 5

data = list(range(N))
data[0] = pd.NaT
data = np.array(data)
self.data = data

def time_maybe_convert_objects(self):
lib.maybe_convert_objects(self.data)


class Factorize:

params = [[True, False], ['int', 'uint', 'float', 'string']]
param_names = ['sort', 'dtype']
params = [[True, False], ["int", "uint", "float", "string"]]
param_names = ["sort", "dtype"]

def setup(self, sort, dtype):
N = 10**5
data = {'int': pd.Int64Index(np.arange(N).repeat(5)),
'uint': pd.UInt64Index(np.arange(N).repeat(5)),
'float': pd.Float64Index(np.random.randn(N).repeat(5)),
'string': tm.makeStringIndex(N).repeat(5)}
N = 10 ** 5
data = {
"int": pd.Int64Index(np.arange(N).repeat(5)),
"uint": pd.UInt64Index(np.arange(N).repeat(5)),
"float": pd.Float64Index(np.random.randn(N).repeat(5)),
"string": tm.makeStringIndex(N).repeat(5),
}
self.idx = data[dtype]

def time_factorize(self, sort, dtype):
Expand All @@ -32,15 +49,17 @@ def time_factorize(self, sort, dtype):

class FactorizeUnique:

params = [[True, False], ['int', 'uint', 'float', 'string']]
param_names = ['sort', 'dtype']
params = [[True, False], ["int", "uint", "float", "string"]]
param_names = ["sort", "dtype"]

def setup(self, sort, dtype):
N = 10**5
data = {'int': pd.Int64Index(np.arange(N)),
'uint': pd.UInt64Index(np.arange(N)),
'float': pd.Float64Index(np.arange(N)),
'string': tm.makeStringIndex(N)}
N = 10 ** 5
data = {
"int": pd.Int64Index(np.arange(N)),
"uint": pd.UInt64Index(np.arange(N)),
"float": pd.Float64Index(np.arange(N)),
"string": tm.makeStringIndex(N),
}
self.idx = data[dtype]
assert self.idx.is_unique

Expand All @@ -50,15 +69,17 @@ def time_factorize(self, sort, dtype):

class Duplicated:

params = [['first', 'last', False], ['int', 'uint', 'float', 'string']]
param_names = ['keep', 'dtype']
params = [["first", "last", False], ["int", "uint", "float", "string"]]
param_names = ["keep", "dtype"]

def setup(self, keep, dtype):
N = 10**5
data = {'int': pd.Int64Index(np.arange(N).repeat(5)),
'uint': pd.UInt64Index(np.arange(N).repeat(5)),
'float': pd.Float64Index(np.random.randn(N).repeat(5)),
'string': tm.makeStringIndex(N).repeat(5)}
N = 10 ** 5
data = {
"int": pd.Int64Index(np.arange(N).repeat(5)),
"uint": pd.UInt64Index(np.arange(N).repeat(5)),
"float": pd.Float64Index(np.random.randn(N).repeat(5)),
"string": tm.makeStringIndex(N).repeat(5),
}
self.idx = data[dtype]
# cache is_unique
self.idx.is_unique
Expand All @@ -69,15 +90,17 @@ def time_duplicated(self, keep, dtype):

class DuplicatedUniqueIndex:

params = ['int', 'uint', 'float', 'string']
param_names = ['dtype']
params = ["int", "uint", "float", "string"]
param_names = ["dtype"]

def setup(self, dtype):
N = 10**5
data = {'int': pd.Int64Index(np.arange(N)),
'uint': pd.UInt64Index(np.arange(N)),
'float': pd.Float64Index(np.random.randn(N)),
'string': tm.makeStringIndex(N)}
N = 10 ** 5
data = {
"int": pd.Int64Index(np.arange(N)),
"uint": pd.UInt64Index(np.arange(N)),
"float": pd.Float64Index(np.random.randn(N)),
"string": tm.makeStringIndex(N),
}
self.idx = data[dtype]
# cache is_unique
self.idx.is_unique
Expand All @@ -87,58 +110,77 @@ def time_duplicated_unique(self, dtype):


class Hashing:

def setup_cache(self):
N = 10**5
N = 10 ** 5

df = pd.DataFrame(
{'strings': pd.Series(tm.makeStringIndex(10000).take(
np.random.randint(0, 10000, size=N))),
'floats': np.random.randn(N),
'ints': np.arange(N),
'dates': pd.date_range('20110101', freq='s', periods=N),
'timedeltas': pd.timedelta_range('1 day', freq='s', periods=N)})
df['categories'] = df['strings'].astype('category')
{
"strings": pd.Series(
tm.makeStringIndex(10000).take(np.random.randint(0, 10000, size=N))
),
"floats": np.random.randn(N),
"ints": np.arange(N),
"dates": pd.date_range("20110101", freq="s", periods=N),
"timedeltas": pd.timedelta_range("1 day", freq="s", periods=N),
}
)
df["categories"] = df["strings"].astype("category")
df.iloc[10:20] = np.nan
return df

def time_frame(self, df):
hashing.hash_pandas_object(df)

def time_series_int(self, df):
hashing.hash_pandas_object(df['ints'])
hashing.hash_pandas_object(df["ints"])

def time_series_string(self, df):
hashing.hash_pandas_object(df['strings'])
hashing.hash_pandas_object(df["strings"])

def time_series_float(self, df):
hashing.hash_pandas_object(df['floats'])
hashing.hash_pandas_object(df["floats"])

def time_series_categorical(self, df):
hashing.hash_pandas_object(df['categories'])
hashing.hash_pandas_object(df["categories"])

def time_series_timedeltas(self, df):
hashing.hash_pandas_object(df['timedeltas'])
hashing.hash_pandas_object(df["timedeltas"])

def time_series_dates(self, df):
hashing.hash_pandas_object(df['dates'])
hashing.hash_pandas_object(df["dates"])


class Quantile:
params = [[0, 0.5, 1],
['linear', 'nearest', 'lower', 'higher', 'midpoint'],
['float', 'int', 'uint']]
param_names = ['quantile', 'interpolation', 'dtype']
params = [
[0, 0.5, 1],
["linear", "nearest", "lower", "higher", "midpoint"],
["float", "int", "uint"],
]
param_names = ["quantile", "interpolation", "dtype"]

def setup(self, quantile, interpolation, dtype):
N = 10**5
data = {'int': np.arange(N),
'uint': np.arange(N).astype(np.uint64),
'float': np.random.randn(N)}
N = 10 ** 5
data = {
"int": np.arange(N),
"uint": np.arange(N).astype(np.uint64),
"float": np.random.randn(N),
}
self.idx = pd.Series(data[dtype].repeat(5))

def time_quantile(self, quantile, interpolation, dtype):
self.idx.quantile(quantile, interpolation=interpolation)


class SortIntegerArray:
params = [10 ** 3, 10 ** 5]

def setup(self, N):
data = np.arange(N, dtype=float)
data[40] = np.nan
self.array = pd.array(data, dtype="Int64")

def time_argsort(self, N):
self.array.argsort()


from .pandas_vb_common import setup # noqa: F401 isort:skip
6 changes: 2 additions & 4 deletions asv_bench/benchmarks/attrs_caching.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import numpy as np
from pandas import DataFrame

try:
from pandas.util import cache_readonly
except ImportError:
from pandas.util.decorators import cache_readonly


class DataFrameAttributes:

def setup(self):
self.df = DataFrame(np.random.randn(10, 6))
self.cur_index = self.df.index
Expand All @@ -20,14 +20,12 @@ def time_set_index(self):


class CacheReadonly:

def setup(self):

class Foo:

@cache_readonly
def prop(self):
return 5

self.obj = Foo()

def time_cache_readonly(self):
Expand Down
Loading

0 comments on commit b405d2d

Please sign in to comment.