Skip to content

Commit

Permalink
[python] Use importlib.resources instead of pkg_resources (#14449)
Browse files Browse the repository at this point in the history
Fixes: #14428
  • Loading branch information
ehigham authored Apr 11, 2024
1 parent 1f3a050 commit 521ca1a
Show file tree
Hide file tree
Showing 10 changed files with 64 additions and 81 deletions.
46 changes: 26 additions & 20 deletions hail/python/hail/__init__.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
import sys
import importlib.resources
from pathlib import Path
from sys import version_info
from typing import Optional

import pkg_resources
if version_info < (3, 9):
raise EnvironmentError(f'Hail requires Python 3.9 or later, found {version_info.major}.{version_info.minor}')

if sys.version_info < (3, 9):
raise EnvironmentError(
'Hail requires Python 3.9 or later, found {}.{}'.format(sys.version_info.major, sys.version_info.minor)
)

def __resource(name: str) -> Path:
return importlib.resources.files(__name__) / name


def __resource_str(name: str) -> str:
with __resource(name).open('r', encoding='utf-8') as fp:
return fp.read()

__pip_version__ = pkg_resources.resource_string(__name__, 'hail_pip_version').decode().strip()
del pkg_resources
del sys

__doc__ = r"""
__ __ <>__
Expand All @@ -32,7 +35,9 @@
# F403 'from .expr import *' used; unable to detect undefined names
# F401 '.expr.*' imported but unused
# E402 module level import not at top of file
from hail.utils import ( # noqa: E402

# ruff: noqa: E402
from hail.utils import (
ANY_REGION,
Interval,
Struct,
Expand All @@ -47,7 +52,7 @@
hadoop_stat,
)

from . import ( # noqa: E402
from . import (
backend,
experimental,
expr,
Expand All @@ -62,7 +67,7 @@
utils,
vds,
)
from .context import ( # noqa: E402
from .context import (
TemporaryDirectory,
TemporaryFilename,
_async_current_backend,
Expand All @@ -86,12 +91,12 @@
tmp_dir,
version,
)
from .expr import * # noqa: E402, F403
from .expr import aggregators # noqa: E402
from .genetics import * # noqa: F403, E402
from .matrixtable import GroupedMatrixTable, MatrixTable # noqa: E402
from .methods import * # noqa: F403, E402
from .table import GroupedTable, Table, asc, desc # noqa: E402
from .expr import * # noqa: F403
from .expr import aggregators
from .genetics import * # noqa: F403
from .matrixtable import GroupedMatrixTable, MatrixTable
from .methods import * # noqa: F403
from .table import GroupedTable, Table, asc, desc

agg = aggregators
scan = aggregators.aggregators.ScanFunctions({name: getattr(agg, name) for name in agg.__all__})
Expand Down Expand Up @@ -157,18 +162,19 @@
__all__.extend(methods.__all__)

# don't overwrite builtins in `from hail import *`
import builtins # noqa: E402
import builtins

__all__.extend([x for x in expr.__all__ if not hasattr(builtins, x)])
del builtins

ir.register_functions()
ir.register_aggregators()

__pip_version__ = __resource_str('hail_pip_version').strip()
__version__: Optional[str] = None # set by hail.version()
__revision__: Optional[str] = None # set by hail.revision()

import warnings # noqa: E402
import warnings

warnings.filterwarnings('once', append=True)
del warnings
31 changes: 10 additions & 21 deletions hail/python/hail/backend/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
from typing import AbstractSet, Any, ClassVar, Dict, List, Mapping, Optional, Tuple, TypeVar, Union

import orjson
import pkg_resources

from hailtop.config.user_config import unchecked_configuration_of
from hailtop.fs.fs import FS

from .. import __resource, __resource_str
from ..builtin_references import BUILTIN_REFERENCE_RESOURCE_PATHS
from ..expr import Expression
from ..expr.blockmatrix_type import tblockmatrix
Expand Down Expand Up @@ -50,33 +50,22 @@ def __init__(self, development_mode: bool, local_jar_path: str, extra_classpath:
self.extra_classpath = extra_classpath


def resource_exists(f: str) -> bool:
return pkg_resources.resource_exists(__name__, f)


def resource_filename(f: str) -> str:
return pkg_resources.resource_filename(__name__, f)


def local_jar_information() -> LocalJarInformation:
if resource_exists('hail.jar'):
if (hail_jar := __resource('backend/hail.jar')).is_file():
warnings.warn('!!! THIS IS A DEVELOPMENT VERSION OF HAIL !!!')
with open(pkg_resources.resource_filename(__name__, 'extra_classpath')) as fobj:
return LocalJarInformation(
True,
resource_filename('hail.jar'),
[fobj.read()],
)
elif resource_exists('hail-all-spark.jar'):
return LocalJarInformation(
True,
str(hail_jar),
[__resource_str('backend/extra_classpath')],
)
elif (hail_all_spark_jar := __resource('backend/hail-all-spark.jar')).is_file():
return LocalJarInformation(
False,
resource_filename("hail-all-spark.jar"),
str(hail_all_spark_jar),
[],
)
else:
raise ValueError(
f'Hail requires either {resource_filename("hail.jar")} or {resource_filename("hail-all-spark.jar")}.'
)
raise ValueError(f'Hail requires either {hail_jar} or {hail_all_spark_jar}.')


class ActionTag(Enum):
Expand Down
10 changes: 5 additions & 5 deletions hail/python/hail/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from typing import Dict, List, Optional, Tuple, Type, Union
from urllib.parse import urlparse, urlunparse

import pkg_resources
from pyspark import SparkContext

import hail
Expand All @@ -21,6 +20,7 @@
from hailtop.hail_event_loop import hail_event_loop
from hailtop.utils import secret_alnum_string

from . import __resource_str
from .backend.backend import local_jar_information
from .builtin_references import BUILTIN_REFERENCES

Expand Down Expand Up @@ -643,8 +643,8 @@ def version() -> str:
str
"""
if hail.__version__ is None:
# https://stackoverflow.com/questions/6028000/how-to-read-a-static-file-from-inside-a-python-package
hail.__version__ = pkg_resources.resource_string(__name__, 'hail_version').decode().strip()
hail.__version__ = __resource_str('hail_version').strip()

return hail.__version__


Expand All @@ -656,8 +656,8 @@ def revision() -> str:
str
"""
if hail.__revision__ is None:
# https://stackoverflow.com/questions/6028000/how-to-read-a-static-file-from-inside-a-python-package
hail.__revision__ = pkg_resources.resource_string(__name__, 'hail_revision').decode().strip()
hail.__revision__ = __resource_str('hail_revision').strip()

return hail.__revision__


Expand Down
12 changes: 3 additions & 9 deletions hail/python/hail/experimental/datasets.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
import json
import os
from typing import Optional, Union

import pkg_resources

import hail as hl
from hail.matrixtable import MatrixTable
from hail.table import Table

from .datasets_metadata import get_datasets_metadata


def _read_dataset(path: str) -> Union[Table, MatrixTable, hl.linalg.BlockMatrix]:
if path.endswith('.ht'):
Expand Down Expand Up @@ -76,11 +74,7 @@ def load_dataset(
f'Valid cloud platforms are {valid_clouds}.'
)

config_path = pkg_resources.resource_filename(__name__, 'datasets.json')
assert os.path.exists(config_path), f'{config_path} does not exist'
with open(config_path) as f:
datasets = json.load(f)

datasets = get_datasets_metadata()
names = set([dataset for dataset in datasets])
if name not in names:
raise ValueError(f'{name} is not a dataset available in the' f' repository.')
Expand Down
8 changes: 8 additions & 0 deletions hail/python/hail/experimental/datasets_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import json

from hail import __resource


def get_datasets_metadata():
with __resource('experimental/datasets.json').open('r') as fp:
return json.load(fp)
10 changes: 2 additions & 8 deletions hail/python/hail/experimental/db.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
import json
import os
import warnings
from typing import ClassVar, Iterable, List, Optional, Set, Tuple, Union

import pkg_resources

import hail as hl
from hailtop.utils import external_requests_client_session, retry_response_returning_functions

Expand All @@ -13,6 +9,7 @@
from ..table import Table, table_type
from ..typecheck import oneof, typecheck_method
from ..utils.java import Env, info
from .datasets_metadata import get_datasets_metadata
from .lens import MatrixRows, TableRows


Expand Down Expand Up @@ -341,10 +338,7 @@ def __init__(
)
if config is None:
if url is None:
config_path = pkg_resources.resource_filename(__name__, 'datasets.json')
assert os.path.exists(config_path), f'{config_path} does not exist'
with open(config_path) as f:
config = json.load(f)
config = get_datasets_metadata()
else:
session = external_requests_client_session()
response = retry_response_returning_functions(session.get, url)
Expand Down
5 changes: 3 additions & 2 deletions hail/python/hailtop/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
def version() -> str:
global _VERSION
if _VERSION is None:
import pkg_resources # pylint: disable=import-outside-toplevel
import importlib.resources as r # pylint: disable=import-outside-toplevel

_VERSION = (r.files(__name__) / 'hail_version').read_text('utf-8').strip()

_VERSION = pkg_resources.resource_string(__name__, 'hail_version').decode().strip()
return _VERSION


Expand Down
12 changes: 4 additions & 8 deletions hail/python/hailtop/hailctl/dataproc/deploy_metadata.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
import importlib.resources as r

import yaml


def get_deploy_metadata():
import pkg_resources # pylint: disable=import-outside-toplevel

if not pkg_resources.resource_exists("hailtop.hailctl", "deploy.yaml"):
raise RuntimeError("package has no 'deploy.yaml' file")

deploy_metadata = yaml.safe_load(pkg_resources.resource_stream("hailtop.hailctl", "deploy.yaml"))

return deploy_metadata["dataproc"]
content = r.files('hailtop.hailctl').joinpath('deploy.yaml').read_text('utf-8')
return yaml.safe_load(content)["dataproc"]
9 changes: 2 additions & 7 deletions hail/python/hailtop/hailctl/dataproc/start.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@
from shlex import quote as shq
from typing import List, Optional

import yaml

from . import gcloud
from .cluster_config import ClusterConfig
from .deploy_metadata import get_deploy_metadata


class VepVersion(str, Enum):
Expand Down Expand Up @@ -179,14 +178,10 @@ def start(
debug_mode: bool,
beta: bool,
):
import pkg_resources # pylint: disable=import-outside-toplevel

conf = ClusterConfig()
conf.extend_flag('image-version', IMAGE_VERSION)

if not pkg_resources.resource_exists('hailtop.hailctl', "deploy.yaml"):
raise RuntimeError("package has no 'deploy.yaml' file")
deploy_metadata = yaml.safe_load(pkg_resources.resource_stream('hailtop.hailctl', "deploy.yaml"))['dataproc']
deploy_metadata = get_deploy_metadata()

conf.extend_flag('properties', DEFAULT_PROPERTIES)
if properties:
Expand Down
2 changes: 1 addition & 1 deletion hail/python/test/hail/backend/test_spark_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,4 @@ def test_copy_spark_log(copy):
_, filename = os.path.split(hc._log)
log = os.path.join(hc._tmpdir, filename)

assert Env.fs().exists(log) if copy else not Env.fs().exists(log)
assert Env.fs().exists(log) == copy

0 comments on commit 521ca1a

Please sign in to comment.