Skip to content

Commit

Permalink
Python: Update getPackageSnapshotImports() to handle vendor directory
Browse files Browse the repository at this point in the history
This adds vendor directory handling to getPackageSnapshotImports() and
by doing so makes package vendoring work correctly.
  • Loading branch information
hoodmane committed Feb 28, 2025
1 parent a9466ff commit 43af23b
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 73 deletions.
95 changes: 62 additions & 33 deletions src/workerd/api/pyodide/pyodide-test.c++
Original file line number Diff line number Diff line change
Expand Up @@ -219,79 +219,108 @@ kj::HashSet<kj::String> strSet(Params&&... params) {
return set;
}

KJ_TEST("basic test of getPackageSnapshotImports") {
auto a = pyodide::PythonModuleInfo(strArray("a.py"),
bytesArray("from js import Response\n"
"import asyncio\n"
"import numbers\n"
"def on_fetch(request):\n"
" return Response.new('Hello')\n"));
auto result = a.getPackageSnapshotImports();
KJ_REQUIRE(result.size() == 1);
KJ_REQUIRE(result[0] == "numbers");
};

KJ_TEST("basic test of getPackageSnapshotImports user module") {
auto a = pyodide::PythonModuleInfo(strArray("a.py", "numbers.py"),
bytesArray("from js import Response\n"
"import asyncio\n"
"import numbers\n"
"def on_fetch(request):\n"
" return Response.new('Hello')\n",
""));
auto result = a.getPackageSnapshotImports();
KJ_REQUIRE(result.size() == 0);
};

kj::Array<kj::String> filterPythonScriptImports(
kj::Array<kj::String> names, kj::ArrayPtr<kj::String> imports) {
auto contentsBuilder = kj::heapArrayBuilder<kj::Array<kj::byte>>(names.size());
for (auto _: kj::zeroTo(names.size())) {
(void)_;
contentsBuilder.add(kj::Array<kj::byte>(0));
}
auto modInfo = pyodide::PythonModuleInfo(kj::mv(names), contentsBuilder.finish());
auto modSet = modInfo.getWorkerModuleSet();
return PythonModuleInfo::filterPythonScriptImports(kj::mv(modSet), kj::mv(imports));
}

KJ_TEST("Simple pass through") {
auto imports = strArray("b", "c");
auto result = PythonModuleInfo::filterPythonScriptImports({}, kj::mv(imports));
auto result = filterPythonScriptImports({}, kj::mv(imports));
KJ_REQUIRE(result.size() == 2);
KJ_REQUIRE(result[0] == "b");
KJ_REQUIRE(result[1] == "c");
}

KJ_TEST("pyodide and submodules") {
auto imports = strArray("pyodide", "pyodide.ffi");
auto result = PythonModuleInfo::filterPythonScriptImports({}, kj::mv(imports));
auto result = filterPythonScriptImports({}, kj::mv(imports));
KJ_REQUIRE(result.size() == 0);
}

KJ_TEST("js and submodules") {
auto imports = strArray("js", "js.crypto");
auto result = PythonModuleInfo::filterPythonScriptImports({}, kj::mv(imports));
auto result = filterPythonScriptImports({}, kj::mv(imports));
KJ_REQUIRE(result.size() == 0);
}

KJ_TEST("importlib and submodules") {
// importlib and importlib.metadata are imported into the baseline snapshot, but importlib.resources is not.
auto imports = strArray("importlib", "importlib.metadata", "importlib.resources");
auto result = PythonModuleInfo::filterPythonScriptImports({}, kj::mv(imports));
auto result = filterPythonScriptImports({}, kj::mv(imports));
KJ_REQUIRE(result.size() == 1);
KJ_REQUIRE(result[0] == "importlib.resources");
}

KJ_TEST("Filter worker .py files") {
auto workerModules = strSet("b.py", "c.py");
auto workerModules = strArray("b.py", "c.py");
auto imports = strArray("b", "c", "d");
auto result = PythonModuleInfo::filterPythonScriptImports(kj::mv(workerModules), kj::mv(imports));
auto result = filterPythonScriptImports(kj::mv(workerModules), kj::mv(imports));
KJ_REQUIRE(result.size() == 1);
KJ_REQUIRE(result[0] == "d");
}

KJ_TEST("Filter worker module/__init__.py") {
auto workerModules = strSet("a/__init__.py", "b/__init__.py", "c/a.py");
auto workerModules = strArray("a/__init__.py", "b/__init__.py", "c/a.py");
auto imports = strArray("a", "b", "c");
auto result = PythonModuleInfo::filterPythonScriptImports(kj::mv(workerModules), kj::mv(imports));
KJ_REQUIRE(result.size() == 1);
KJ_REQUIRE(result[0] == "c");
auto result = filterPythonScriptImports(kj::mv(workerModules), kj::mv(imports));
KJ_REQUIRE(result.size() == 0);
}

KJ_TEST("Filters out subdir/submodule") {
auto workerModules = strSet("subdir/submodule.py");
auto workerModules = strArray("subdir/submodule.py");
auto imports = strArray("subdir.submodule");
auto result = PythonModuleInfo::filterPythonScriptImports(kj::mv(workerModules), kj::mv(imports));
auto result = filterPythonScriptImports(kj::mv(workerModules), kj::mv(imports));
KJ_REQUIRE(result.size() == 0);
}

KJ_TEST("basic test of getPackageSnapshotImports") {
auto a = pyodide::PythonModuleInfo(strArray("a.py"),
bytesArray("from js import Response\n"
"import asyncio\n"
"import numbers\n"
"def on_fetch(request):\n"
" return Response.new('Hello')\n"));
auto result = a.getPackageSnapshotImports();
KJ_TEST("Filters out so") {
auto workerModules = strArray("a.so", "b.txt");
auto imports = strArray("a", "b");
auto result = filterPythonScriptImports(kj::mv(workerModules), kj::mv(imports));
KJ_REQUIRE(result.size() == 1);
KJ_REQUIRE(result[0] == "numbers");
};
KJ_REQUIRE(result[0] == "b");
}

KJ_TEST("Filters out vendor stuff") {
auto workerModules =
strArray("vendor/a.py", "vendor/package/b.py", "vendor/c.so", "vendor/x.txt");
auto imports = strArray("a", "package", "x");
auto result = filterPythonScriptImports(kj::mv(workerModules), kj::mv(imports));
KJ_REQUIRE(result.size() == 1);
KJ_REQUIRE(result[0] == "x");
}

KJ_TEST("basic test of getPackageSnapshotImports user module") {
auto a = pyodide::PythonModuleInfo(strArray("a.py", "numbers.py"),
bytesArray("from js import Response\n"
"import asyncio\n"
"import numbers\n"
"def on_fetch(request):\n"
" return Response.new('Hello')\n",
""));
auto result = a.getPackageSnapshotImports();
KJ_REQUIRE(result.size() == 0);
};
} // namespace
} // namespace workerd::api
61 changes: 25 additions & 36 deletions src/workerd/api/pyodide/pyodide.c++
Original file line number Diff line number Diff line change
Expand Up @@ -93,22 +93,39 @@ kj::Array<kj::String> PythonModuleInfo::getPythonFileContents() {
return builder.releaseAsArray();
}

kj::HashSet<kj::String> PythonModuleInfo::getPythonFileNamesSet() {
kj::HashSet<kj::String> PythonModuleInfo::getWorkerModuleSet() {
auto result = kj::HashSet<kj::String>();
for (auto& name: names) {
if (!name.endsWith(".py")) {
const auto vendor = "vendor/"_kj;
const auto dotpy = ".py"_kj;
const auto dotso = ".so"_kj;
for (auto& name_: names) {
kj::StringPtr name = name_;
if (name.startsWith(vendor)) {
name = name.slice(vendor.size());
}
auto firstSlash = name.findFirst('/');
KJ_IF_SOME(idx, firstSlash) {
result.insert(kj::str(name.slice(0, idx)));
continue;
}
if (name.endsWith(dotpy)) {
result.insert(kj::str(name.slice(0, name.size() - dotpy.size())));
continue;
}
if (name.endsWith(dotso)) {
result.insert(kj::str(name.slice(0, name.size() - dotso.size())));
continue;
}
result.insert(kj::str(name));
}
return result;
}

kj::Array<kj::String> PythonModuleInfo::getPackageSnapshotImports() {
auto workerFiles = this->getPythonFileContents();
auto imports = parsePythonScriptImports(kj::mv(workerFiles));
auto names = getPythonFileNamesSet();
return PythonModuleInfo::filterPythonScriptImports(kj::mv(names), kj::mv(imports));
auto imported_names = parsePythonScriptImports(kj::mv(workerFiles));
auto worker_modules = getWorkerModuleSet();
return PythonModuleInfo::filterPythonScriptImports(
kj::mv(worker_modules), kj::mv(imported_names));
}

kj::Array<kj::String> PyodideMetadataReader::getPackageSnapshotImports() {
Expand Down Expand Up @@ -384,34 +401,6 @@ kj::Array<kj::StringPtr> ArtifactBundler::getSnapshotImports() {
return kj::heapArray(snapshotImports.begin(), snapshotImports.size());
}

// This is equivalent to `pkgImport.replace('.', '/') + ".py"`.
kj::String importToModuleFilename(kj::StringPtr pkgImport) {
auto result = kj::heapString(pkgImport.size() + 3);
for (auto i = 0; i < pkgImport.size(); i++) {
if (pkgImport[i] == '.') {
result[i] = '/';
} else {
result[i] = pkgImport[i];
}
}

result[pkgImport.size()] = '.';
result[pkgImport.size() + 1] = 'p';
result[pkgImport.size() + 2] = 'y';
return result;
}

bool isWorkerModuleImport(kj::HashSet<kj::String>& workerModules,
kj::ArrayPtr<const char> firstComponent,
kj::StringPtr pkgImport) {
// TODO: handling for:
// 1. namespace packages
// 2. shared libraries
return workerModules.contains(kj::str(firstComponent, ".py")) ||
workerModules.contains(kj::str(firstComponent, "/__init__.py")) ||
workerModules.contains(importToModuleFilename(pkgImport));
}

kj::Array<kj::String> PythonModuleInfo::filterPythonScriptImports(
kj::HashSet<kj::String> workerModules, kj::ArrayPtr<kj::String> imports) {
auto baselineSnapshotImportsSet = kj::HashSet<kj::StringPtr>();
Expand Down Expand Up @@ -440,7 +429,7 @@ kj::Array<kj::String> PythonModuleInfo::filterPythonScriptImports(
}

// Don't include imports from worker files
if (isWorkerModuleImport(workerModules, firstComponent, pkgImport)) {
if (workerModules.contains(firstComponent)) {
continue;
}
filteredImportsSet.insert(kj::mv(pkgImport));
Expand Down
2 changes: 1 addition & 1 deletion src/workerd/api/pyodide/pyodide.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ class PythonModuleInfo {
//
// Package relative imports are ignored.
static kj::Array<kj::String> parsePythonScriptImports(kj::Array<kj::String> files);
kj::HashSet<kj::String> getPythonFileNamesSet();
kj::HashSet<kj::String> getWorkerModuleSet();
kj::Array<kj::String> getPythonFileContents();
static kj::Array<kj::String> filterPythonScriptImports(
kj::HashSet<kj::String> workerModules, kj::ArrayPtr<kj::String> imports);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ const unitTests :Workerd.Config = (
worker = (
modules = [
(name = "worker.py", pythonModule = embed "worker.py"),
(name = "vendor/a.py", pythonModule = embed "vendor/a.py")
(name = "vendor/a.py", pythonModule = embed "vendor/a.py"),
(name = "numpy", pythonRequirement = "")
],
compatibilityDate = "2024-01-15",
compatibilityFlags = [%PYTHON_FEATURE_FLAGS],
Expand Down
5 changes: 3 additions & 2 deletions src/workerd/server/tests/python/vendor_dir/worker.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
def test():
from a import A
from a import A


def test():
assert A == 77

0 comments on commit 43af23b

Please sign in to comment.