Skip to content

Commit

Permalink
GitHub, GitLab: improve claim matching during lookup (#16462)
Browse files Browse the repository at this point in the history
* GitHub, GitLab: escape matchable characters during publisher lookup

This fixes a constraint violation that users were previously able
to induce by registering two similar-looking-but-not-exact
Trusted Publishers. For example:

```
repo: foo/bar
workflow: publish-pypi.yml
```

and:

```
repo: foo/bar
workflow: publish_pypi.yml
```

Before this change, PyPI's publisher lookup would treat
the `_` in `publish_pypi.yml` as a match expression,
resulting in both publishers being matched when only
exactly one publisher was expected to match. As a result,
a user could configure two publishers in such a way that
minting would *always* fail.

To fix this, we replace our use of a raw `like` with
a more constrained `startswith` *and* escape `_` and `%`
when they appear in the workflow identifier being matched on.
We would ideally do this using SQLAlchemy's `autoescape`,
but `autoescape` only supports string clauses and not
column clauses. Instead, we build up a somewhat verbose chain
of escape replacements manually.

Signed-off-by: William Woodruff <william@trailofbits.com>

* github: replace obnoxious escaping with a regexp

This avoids the need for a very slow text scan over an escaped
column by using a regexp to extract the expected exact column match.
This regexp is itself not the faster, but it's definitely
faster than scanning an unindexed column in the middle of a query.

Signed-off-by: William Woodruff <william@trailofbits.com>

* replace escaping for GitLab too

Signed-off-by: William Woodruff <william@trailofbits.com>

* simplify assignments

Signed-off-by: William Woodruff <william@trailofbits.com>

* tests, warehouse: coverage, document regexps

Signed-off-by: William Woodruff <william@trailofbits.com>

---------

Signed-off-by: William Woodruff <william@trailofbits.com>
Co-authored-by: Dustin Ingram <di@users.noreply.github.com>
  • Loading branch information
woodruffw and di committed Aug 13, 2024
1 parent 293a230 commit efe40f0
Show file tree
Hide file tree
Showing 4 changed files with 329 additions and 74 deletions.
141 changes: 133 additions & 8 deletions tests/unit/oidc/models/test_github.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,82 @@
from warehouse.oidc.models import _core, github


@pytest.mark.parametrize(
("workflow_ref", "expected"),
[
# Well-formed workflow refs, including exceedingly obnoxious ones
# with `@` or extra suffixes or `git` refs that look like workflows.
("foo/bar/.github/workflows/basic.yml@refs/heads/main", "basic.yml"),
("foo/bar/.github/workflows/basic.yaml@refs/heads/main", "basic.yaml"),
("foo/bar/.github/workflows/has-dash.yml@refs/heads/main", "has-dash.yml"),
(
"foo/bar/.github/workflows/has--dashes.yml@refs/heads/main",
"has--dashes.yml",
),
(
"foo/bar/.github/workflows/has--dashes-.yml@refs/heads/main",
"has--dashes-.yml",
),
("foo/bar/.github/workflows/has.period.yml@refs/heads/main", "has.period.yml"),
(
"foo/bar/.github/workflows/has..periods.yml@refs/heads/main",
"has..periods.yml",
),
(
"foo/bar/.github/workflows/has..periods..yml@refs/heads/main",
"has..periods..yml",
),
(
"foo/bar/.github/workflows/has_underscore.yml@refs/heads/main",
"has_underscore.yml",
),
(
"foo/bar/.github/workflows/nested@evil.yml@refs/heads/main",
"nested@evil.yml",
),
(
"foo/bar/.github/workflows/nested.yml@evil.yml@refs/heads/main",
"nested.yml@evil.yml",
),
(
"foo/bar/.github/workflows/extra@nested.yml@evil.yml@refs/heads/main",
"extra@nested.yml@evil.yml",
),
(
"foo/bar/.github/workflows/extra.yml@nested.yml@evil.yml@refs/heads/main",
"extra.yml@nested.yml@evil.yml",
),
(
"foo/bar/.github/workflows/basic.yml@refs/heads/misleading@branch.yml",
"basic.yml",
),
(
"foo/bar/.github/workflows/basic.yml@refs/heads/bad@branch@twomatches.yml",
"basic.yml",
),
("foo/bar/.github/workflows/foo.yml.yml@refs/heads/main", "foo.yml.yml"),
(
"foo/bar/.github/workflows/foo.yml.foo.yml@refs/heads/main",
"foo.yml.foo.yml",
),
# Malformed workflow refs.
("foo/bar/.github/workflows/basic.wrongsuffix@refs/heads/main", None),
("foo/bar/.github/workflows/@refs/heads/main", None),
("foo/bar/.github/workflows/nosuffix@refs/heads/main", None),
("foo/bar/.github/workflows/.yml@refs/heads/main", None),
("foo/bar/.github/workflows/.yaml@refs/heads/main", None),
("foo/bar/.github/workflows/main.yml", None),
],
)
def test_extract_workflow_filename(workflow_ref, expected):
assert github._extract_workflow_filename(workflow_ref) == expected


@pytest.mark.parametrize("claim", ["", "repo", "repo:"])
def test_check_sub(claim):
assert github._check_sub(pretend.stub(), claim, pretend.stub()) is False


def test_lookup_strategies():
assert (
len(github.GitHubPublisher.__lookup_strategies__)
== len(github.PendingGitHubPublisher.__lookup_strategies__)
== 2
)


class TestGitHubPublisher:
def test_lookup_strategies(self):
assert (
Expand All @@ -41,6 +104,68 @@ def test_lookup_strategies(self):
== 2
)

@pytest.mark.parametrize("environment", [None, "some_environment"])
def test_lookup_fails_invalid_workflow_ref(self, environment):
signed_claims = {
"repository": "foo/bar",
"job_workflow_ref": ("foo/bar/.github/workflows/.yml@refs/heads/main"),
"repository_owner_id": "1234",
}

if environment:
signed_claims["environment"] = environment

# The `job_workflow_ref` is malformed, so no queries are performed.
with pytest.raises(
errors.InvalidPublisherError, match="All lookup strategies exhausted"
):
github.GitHubPublisher.lookup_by_claims(pretend.stub(), signed_claims)

@pytest.mark.parametrize("environment", ["", "some_environment"])
@pytest.mark.parametrize(
("workflow_a", "workflow_b"),
[
("release-pypi.yml", "release_pypi.yml"),
("release%pypi.yml", "release-pypi.yml"),
],
)
def test_lookup_escapes(self, db_request, environment, workflow_a, workflow_b):
GitHubPublisherFactory(
id="aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
repository_owner="foo",
repository_name="bar",
repository_owner_id="1234",
workflow_filename=workflow_a,
environment=environment,
)
GitHubPublisherFactory(
id="bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb",
repository_owner="foo",
repository_name="bar",
repository_owner_id="1234",
workflow_filename=workflow_b,
environment=environment,
)

for workflow in (workflow_a, workflow_b):
signed_claims = {
"repository": "foo/bar",
"job_workflow_ref": (
f"foo/bar/.github/workflows/{workflow}@refs/heads/main"
),
"repository_owner_id": "1234",
}

if environment:
signed_claims["environment"] = environment

assert (
github.GitHubPublisher.lookup_by_claims(
db_request.db, signed_claims
).workflow_filename
== workflow
)

def test_github_publisher_all_known_claims(self):
assert github.GitHubPublisher.all_known_claims() == {
# required verifiable claims
Expand Down
108 changes: 100 additions & 8 deletions tests/unit/oidc/models/test_gitlab.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,51 @@
from warehouse.oidc.models import _core, gitlab


@pytest.mark.parametrize(
("ci_config_ref_uri", "expected"),
[
# Well-formed `ci_config_ref_uri`s, including obnoxious ones.
("gitlab.com/foo/bar//notnested.yml@/some/ref", "notnested.yml"),
("gitlab.com/foo/bar//notnested.yaml@/some/ref", "notnested.yaml"),
("gitlab.com/foo/bar//basic/basic.yml@/some/ref", "basic/basic.yml"),
(
"gitlab.com/foo/bar//more/nested/example.yml@/some/ref",
"more/nested/example.yml",
),
(
"gitlab.com/foo/bar//too//many//slashes.yml@/some/ref",
"too//many//slashes.yml",
),
(
"gitlab.com/foo/bar//too//many//slashes.yml@/some/ref",
"too//many//slashes.yml",
),
("gitlab.com/foo/bar//has-@.yml@/some/ref", "has-@.yml"),
("gitlab.com/foo/bar//foo.bar.yml@/some/ref", "foo.bar.yml"),
("gitlab.com/foo/bar//foo.yml.bar.yml@/some/ref", "foo.yml.bar.yml"),
("gitlab.com/foo/bar//foo.yml@bar.yml@/some/ref", "foo.yml@bar.yml"),
("gitlab.com/foo/bar//@foo.yml@bar.yml@/some/ref", "@foo.yml@bar.yml"),
(
"gitlab.com/foo/bar//@.yml.foo.yml@bar.yml@/some/ref",
"@.yml.foo.yml@bar.yml",
),
# Malformed `ci_config_ref_uri`s.
("gitlab.com/foo/bar//notnested.wrongsuffix@/some/ref", None),
("gitlab.com/foo/bar//@/some/ref", None),
("gitlab.com/foo/bar//.yml@/some/ref", None),
("gitlab.com/foo/bar//.yaml@/some/ref", None),
("gitlab.com/foo/bar//somedir/.yaml@/some/ref", None),
],
)
def test_extract_workflow_filename(ci_config_ref_uri, expected):
assert gitlab._extract_workflow_filepath(ci_config_ref_uri) == expected


@pytest.mark.parametrize("claim", ["", "repo", "repo:"])
def test_check_sub(claim):
assert gitlab._check_sub(pretend.stub(), claim, pretend.stub()) is False


def test_lookup_strategies():
assert (
len(gitlab.GitLabPublisher.__lookup_strategies__)
== len(gitlab.PendingGitLabPublisher.__lookup_strategies__)
== 2
)


class TestGitLabPublisher:
def test_lookup_strategies(self):
assert (
Expand All @@ -40,6 +72,66 @@ def test_lookup_strategies(self):
== 2
)

@pytest.mark.parametrize("environment", [None, "some_environment"])
def test_lookup_fails_invalid_ci_config_ref_uri(self, environment):
signed_claims = {
"project_path": "foo/bar",
"ci_config_ref_uri": ("gitlab.com/foo/bar//example/.yml@refs/heads/main"),
}

if environment:
signed_claims["environment"] = environment

# The `ci_config_ref_uri` is malformed, so no queries are performed.
with pytest.raises(
errors.InvalidPublisherError, match="All lookup strategies exhausted"
):
gitlab.GitLabPublisher.lookup_by_claims(pretend.stub(), signed_claims)

@pytest.mark.parametrize("environment", ["", "some_environment"])
@pytest.mark.parametrize(
("workflow_filepath_a", "workflow_filepath_b"),
[
("workflows/release_pypi/ci.yml", "workflows/release-pypi/ci.yml"),
("workflows/release%pypi/ci.yml", "workflows/release-pypi/ci.yml"),
],
)
def test_lookup_escapes(
self, db_request, environment, workflow_filepath_a, workflow_filepath_b
):
GitLabPublisherFactory(
id="aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
namespace="foo",
project="bar",
workflow_filepath=workflow_filepath_a,
environment=environment,
)
GitLabPublisherFactory(
id="bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb",
namespace="foo",
project="bar",
workflow_filepath=workflow_filepath_b,
environment=environment,
)

for workflow_filepath in (workflow_filepath_a, workflow_filepath_b):
signed_claims = {
"project_path": "foo/bar",
"ci_config_ref_uri": (
f"gitlab.com/foo/bar//{workflow_filepath}@refs/heads/main"
),
}

if environment:
signed_claims["environment"] = environment

assert (
gitlab.GitLabPublisher.lookup_by_claims(
db_request.db, signed_claims
).workflow_filepath
== workflow_filepath
)

def test_gitlab_publisher_all_known_claims(self):
assert gitlab.GitLabPublisher.all_known_claims() == {
# required verifiable claims
Expand Down
77 changes: 48 additions & 29 deletions warehouse/oidc/models/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import re

from typing import Any

from sigstore.verify.policy import (
Expand All @@ -21,7 +23,6 @@
from sqlalchemy import ForeignKey, String, UniqueConstraint
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import Query, mapped_column
from sqlalchemy.sql.expression import func, literal

from warehouse.oidc.errors import InvalidPublisherError
from warehouse.oidc.interfaces import SignedClaims
Expand All @@ -33,6 +34,30 @@
check_existing_jti,
)

# This expression matches the workflow filename component of a GitHub
# "workflow ref", i.e. the value present in the `workflow_ref` and
# `job_workflow_ref` claims. This requires a nontrivial (and nonregular)
# pattern, since the workflow filename and other components of the workflow
# can contain overlapping delimiters (such as `@` in the workflow filename,
# or `git` refs that look like workflow filenames).
_WORKFLOW_FILENAME_RE = re.compile(
r"""
( # our capture group
[^/]+ # match one or more non-slash characters
\.(yml|yaml) # match the literal suffix `.yml` or `.yaml`
)
(?=@) # lookahead match for `@`, constraining the group above
""",
re.X,
)


def _extract_workflow_filename(workflow_ref: str) -> str | None:
if match := _WORKFLOW_FILENAME_RE.search(workflow_ref):
return match.group(0)
else:
return None


def _check_repository(ground_truth, signed_claim, _all_signed_claims, **_kwargs):
# Defensive: GitHub should never give us an empty repository claim.
Expand Down Expand Up @@ -178,40 +203,34 @@ def __lookup_all__(klass, signed_claims: SignedClaims) -> Query | None:

repository = signed_claims["repository"]
repository_owner, repository_name = repository.split("/", 1)
workflow_prefix = f"{repository}/.github/workflows/"
workflow_ref = signed_claims["job_workflow_ref"].removeprefix(workflow_prefix)

return (
Query(klass)
.filter_by(
repository_name=repository_name,
repository_owner=repository_owner,
repository_owner_id=signed_claims["repository_owner_id"],
environment=environment.lower(),
)
.filter(
literal(workflow_ref).like(func.concat(klass.workflow_filename, "%"))
)
workflow_ref = signed_claims["job_workflow_ref"]

if not (workflow_filename := _extract_workflow_filename(workflow_ref)):
return None

return Query(klass).filter_by(
repository_name=repository_name,
repository_owner=repository_owner,
repository_owner_id=signed_claims["repository_owner_id"],
environment=environment.lower(),
workflow_filename=workflow_filename,
)

@staticmethod
def __lookup_no_environment__(klass, signed_claims: SignedClaims) -> Query | None:
repository = signed_claims["repository"]
repository_owner, repository_name = repository.split("/", 1)
workflow_prefix = f"{repository}/.github/workflows/"
workflow_ref = signed_claims["job_workflow_ref"].removeprefix(workflow_prefix)

return (
Query(klass)
.filter_by(
repository_name=repository_name,
repository_owner=repository_owner,
repository_owner_id=signed_claims["repository_owner_id"],
environment="",
)
.filter(
literal(workflow_ref).like(func.concat(klass.workflow_filename, "%"))
)
workflow_ref = signed_claims["job_workflow_ref"]

if not (workflow_filename := _extract_workflow_filename(workflow_ref)):
return None

return Query(klass).filter_by(
repository_name=repository_name,
repository_owner=repository_owner,
repository_owner_id=signed_claims["repository_owner_id"],
environment="",
workflow_filename=workflow_filename,
)

__lookup_strategies__ = [
Expand Down
Loading

0 comments on commit efe40f0

Please sign in to comment.