diff --git a/python/.gitignore b/python/.gitignore index 80103a1a52942..3cb591ea766d5 100644 --- a/python/.gitignore +++ b/python/.gitignore @@ -35,3 +35,6 @@ dist # coverage .coverage coverage.xml + +# benchmark working dir +.asv diff --git a/python/asv.conf.json b/python/asv.conf.json new file mode 100644 index 0000000000000..96beba64c2e6e --- /dev/null +++ b/python/asv.conf.json @@ -0,0 +1,73 @@ +{ + // The version of the config file format. Do not change, unless + // you know what you are doing. + "version": 1, + + // The name of the project being benchmarked + "project": "pyarrow", + + // The project's homepage + "project_url": "https://arrow.apache.org/", + + // The URL or local path of the source code repository for the + // project being benchmarked + "repo": "https://github.com/apache/arrow/", + + // List of branches to benchmark. If not provided, defaults to "master" + // (for git) or "tip" (for mercurial). + // "branches": ["master"], // for git + // "branches": ["tip"], // for mercurial + + // The DVCS being used. If not set, it will be automatically + // determined from "repo" by looking at the protocol in the URL + // (if remote), or by looking for special directories, such as + // ".git" (if local). + "dvcs": "git", + + // The tool to use to create environments. May be "conda", + // "virtualenv" or other value depending on the plugins in use. + // If missing or the empty string, the tool will be automatically + // determined by looking for tools on the PATH environment + // variable. + "environment_type": "virtualenv", + + // the base URL to show a commit for the project. + "show_commit_url": "https://github.com/apache/arrow/commit/", + + // The Pythons you'd like to test against. If not provided, defaults + // to the current version of Python used to run `asv`. + // "pythons": ["2.7", "3.3"], + + // The matrix of dependencies to test. Each key is the name of a + // package (in PyPI) and the values are version numbers. An empty + // list indicates to just test against the default (latest) + // version. + // "matrix": { + // "numpy": ["1.6", "1.7"] + // }, + + // The directory (relative to the current directory) that benchmarks are + // stored in. If not provided, defaults to "benchmarks" + "benchmark_dir": "benchmarks", + + // The directory (relative to the current directory) to cache the Python + // environments in. If not provided, defaults to "env" + "env_dir": ".asv/env", + + + // The directory (relative to the current directory) that raw benchmark + // results are stored in. If not provided, defaults to "results". + "results_dir": ".asv/results", + + // The directory (relative to the current directory) that the html tree + // should be written to. If not provided, defaults to "html". + "html_dir": "build/benchmarks/html", + + // The number of characters to retain in the commit hashes. + // "hash_length": 8, + + // `asv` will cache wheels of the recent builds in each + // environment, making them faster to install next time. This is + // number of builds to keep, per environment. + // "wheel_cache_size": 0 +} diff --git a/python/benchmarks/__init__.py b/python/benchmarks/__init__.py new file mode 100644 index 0000000000000..245692337bc3f --- /dev/null +++ b/python/benchmarks/__init__.py @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + diff --git a/python/benchmarks/array.py b/python/benchmarks/array.py new file mode 100644 index 0000000000000..6ab73d18d1f87 --- /dev/null +++ b/python/benchmarks/array.py @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pyarrow + +class Conversions(object): + params = (1, 10 ** 5, 10 ** 6, 10 ** 7) + + def time_from_pylist(self, n): + pyarrow.from_pylist(list(range(n))) + + def peakmem_from_pylist(self, n): + pyarrow.from_pylist(list(range(n))) + +class ScalarAccess(object): + params = (1, 10 ** 5, 10 ** 6, 10 ** 7) + + def setUp(self, n): + self._array = pyarrow.from_pylist(list(range(n))) + + def time_as_py(self, n): + for i in range(n): + self._array[i].as_py() + diff --git a/python/doc/Benchmarks.md b/python/doc/Benchmarks.md new file mode 100644 index 0000000000000..8edfb6209e4af --- /dev/null +++ b/python/doc/Benchmarks.md @@ -0,0 +1,11 @@ +## Benchmark Requirements + +The benchmarks are run using [asv][1] which is also their only requirement. + +## Running the benchmarks + +To run the benchmarks, call `asv run --python=same`. You cannot use the +plain `asv run` command at the moment as asv cannot handle python packages +in subdirectories of a repository. + +[1]: https://asv.readthedocs.org/