-
-
Notifications
You must be signed in to change notification settings - Fork 81
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
More speedup via mtime-base caching. #274
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -600,9 +600,15 @@ class FastPath: | |
children. | ||
""" | ||
|
||
def __init__(self, root): | ||
@functools.lru_cache() # type: ignore | ||
def __new__(cls, root): | ||
self = object().__new__(cls) | ||
self.root = str(root) | ||
self.base = os.path.basename(self.root).lower() | ||
self.last_mtime = -1 | ||
self.infos = {} | ||
self.eggs = {} | ||
return self | ||
|
||
def joinpath(self, child): | ||
return pathlib.Path(self.root, child) | ||
|
@@ -618,15 +624,47 @@ def zip_children(self): | |
zip_path = zipp.Path(self.root) | ||
names = zip_path.root.namelist() | ||
self.joinpath = zip_path.joinpath | ||
|
||
return dict.fromkeys(child.split(posixpath.sep, 1)[0] for child in names) | ||
|
||
def search(self, name): | ||
return ( | ||
self.joinpath(child) | ||
for child in self.children() | ||
if name.matches(child, self.base) | ||
) | ||
def update_cache(self): | ||
root = self.root or "." | ||
try: | ||
mtime = os.stat(root).st_mtime | ||
except OSError: | ||
self.infos.clear() | ||
self.eggs.clear() | ||
self.last_mtime = -1 | ||
return | ||
if mtime == self.last_mtime: | ||
return | ||
self.infos.clear() | ||
self.eggs.clear() | ||
base_is_egg = self.base.endswith(".egg") | ||
for child in self.children(): | ||
low = child.lower() | ||
if low.endswith((".dist-info", ".egg-info")): | ||
# rpartition is faster than splitext and suitable for this purpose. | ||
name = low.rpartition(".")[0].partition("-")[0] | ||
normalized = Prepared.normalize(name) | ||
self.infos.setdefault(normalized, []).append(child) | ||
elif base_is_egg and low == "egg-info": | ||
name = self.base.rpartition(".")[0].partition("-")[0] | ||
legacy_normalized = Prepared.legacy_normalize(name) | ||
self.eggs.setdefault(legacy_normalized, []).append(child) | ||
self.last_mtime = mtime | ||
|
||
def search(self, prepared): | ||
self.update_cache() | ||
if prepared.name: | ||
infos = self.infos.get(prepared.normalized, []) | ||
yield from map(self.joinpath, infos) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm seeing a lot of repetition and branching. I'd want to see this unified. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can write something like def search(self, prepared):
self.update_cache()
infos_and_eggs = itertools.chain(
self.infos.get(prepared.normalized, []) if prepared.name else
itertools.chain.from_iterable(self.infos.values()),
self.eggs.get(prepared.legacy_normalized, []) if prepared.name else
itertools.chain.from_iterable(self.eggs.values()))
yield from map(self.joinpath, infos_and_eggs) (or lift the |
||
eggs = self.eggs.get(prepared.legacy_normalized, []) | ||
yield from map(self.joinpath, eggs) | ||
else: | ||
for infos in self.infos.values(): | ||
yield from map(self.joinpath, infos) | ||
for eggs in self.eggs.values(): | ||
yield from map(self.joinpath, eggs) | ||
|
||
|
||
class Prepared: | ||
jaraco marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
@@ -635,22 +673,14 @@ class Prepared: | |
""" | ||
|
||
normalized = None | ||
suffixes = 'dist-info', 'egg-info' | ||
exact_matches = [''][:0] | ||
egg_prefix = '' | ||
versionless_egg_name = '' | ||
legacy_normalized = None | ||
|
||
def __init__(self, name): | ||
self.name = name | ||
if name is None: | ||
return | ||
self.normalized = self.normalize(name) | ||
self.exact_matches = [ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Where did the behavior for exact matches go? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
self.normalized + '.' + suffix for suffix in self.suffixes | ||
] | ||
legacy_normalized = self.legacy_normalize(self.name) | ||
self.egg_prefix = legacy_normalized + '-' | ||
self.versionless_egg_name = legacy_normalized + '.egg' | ||
self.legacy_normalized = self.legacy_normalize(name) | ||
|
||
@staticmethod | ||
def normalize(name): | ||
|
@@ -667,27 +697,6 @@ def legacy_normalize(name): | |
""" | ||
return name.lower().replace('-', '_') | ||
|
||
def matches(self, cand, base): | ||
low = cand.lower() | ||
# rpartition is faster than splitext and suitable for this purpose. | ||
pre, _, ext = low.rpartition('.') | ||
name, _, rest = pre.partition('-') | ||
return ( | ||
low in self.exact_matches | ||
or ext in self.suffixes | ||
and (not self.normalized or name.replace('.', '_') == self.normalized) | ||
# legacy case: | ||
or self.is_egg(base) | ||
and low == 'egg-info' | ||
) | ||
|
||
def is_egg(self, base): | ||
return ( | ||
base == self.versionless_egg_name | ||
or base.startswith(self.egg_prefix) | ||
and base.endswith('.egg') | ||
) | ||
|
||
|
||
@install | ||
class MetadataPathFinder(NullFinder, DistributionFinder): | ||
|
@@ -717,6 +726,9 @@ def _search_paths(cls, name, paths): | |
path.search(prepared) for path in map(FastPath, paths) | ||
) | ||
|
||
def invalidate_caches(cls): | ||
FastPath.__new__.cache_clear() | ||
|
||
|
||
class PathDistribution(Distribution): | ||
def __init__(self, path): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not too happy that a lot of essential behavior ("matches", "is_egg", "prepared computations") has been inlined into an "update_cache" function.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's because there's no more notion of "prepared computations" (because we just parse everything and store the normalized results into the cache). "Matches" is basically just a dict lookup now (in
search()
).I guess at best I could have a
def is_info(self, low): return low.endswith(...)
anddef is_egg(self, low): return self.base_is_egg and low == "egg-info"
. I don't think adding these levels of indirection help legibility, but that's up to you.I can't think of further factoring, though.