From 6ca9b62aca613e0a5ea28980dc847d155fd2354f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jelmer=20Vernoo=C4=B3?= Date: Wed, 6 Nov 2024 10:50:08 +0000 Subject: [PATCH 1/2] Revert broken refspec handling in porcelain.pull encoded_refs get set to short strings, e.g. bmaster. Passing this to ref_prefix with protocol v2 causes no refs at all to be fetched Fixes #1399 --- dulwich/porcelain.py | 28 ++++------------------------ 1 file changed, 4 insertions(+), 24 deletions(-) diff --git a/dulwich/porcelain.py b/dulwich/porcelain.py index 4f04720e2..d76e28e1b 100644 --- a/dulwich/porcelain.py +++ b/dulwich/porcelain.py @@ -486,26 +486,6 @@ def init(path=".", *, bare=False, symlinks: Optional[bool] = None): return Repo.init(path, symlinks=symlinks) -def encode_refspecs(refspecs): - if refspecs is None: - return [b"HEAD"] - - def encode_refspec(ref): - if isinstance(ref, bytes): - return ref - else: - return ref.encode(DEFAULT_ENCODING) - - encoded_refs = [] - if isinstance(refspecs, bytes) or isinstance(refspecs, str): - encoded_refs.append(encode_refspec(refspecs)) - else: - for ref in refspecs: - encoded_refs.append(encode_refspec(ref)) - - return encoded_refs - - def clone( source, target=None, @@ -587,7 +567,6 @@ def clone( depth=depth, filter_spec=filter_spec, protocol_version=protocol_version, - **kwargs, ) @@ -1296,12 +1275,14 @@ def pull( with open_repo_closing(repo) as r: (remote_name, remote_location) = get_remote_repo(r, remote_location) - encoded_refs = encode_refspecs(refspecs) selected_refs = [] + if refspecs is None: + refspecs = [b"HEAD"] + def determine_wants(remote_refs, **kwargs): selected_refs.extend( - parse_reftuples(remote_refs, r.refs, encoded_refs, force=force) + parse_reftuples(remote_refs, r.refs, refspecs, force=force) ) return [ remote_refs[lh] @@ -1319,7 +1300,6 @@ def determine_wants(remote_refs, **kwargs): r, progress=errstream.write, determine_wants=determine_wants, - ref_prefix=refspecs, filter_spec=filter_spec, protocol_version=protocol_version, ) From 5e4637599ea300daff0dcc9ae305b299addd1215 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jelmer=20Vernoo=C4=B3?= Date: Wed, 6 Nov 2024 10:57:14 +0000 Subject: [PATCH 2/2] When using protocol v1, apply ref_prefix client-side for consistency --- dulwich/client.py | 96 ++++++++++++++++++------------------- dulwich/protocol.py | 10 ++++ tests/compat/test_client.py | 13 +++++ tests/test_protocol.py | 22 +++++++-- 4 files changed, 89 insertions(+), 52 deletions(-) diff --git a/dulwich/client.py b/dulwich/client.py index 4a59c0e44..0c89c7c4b 100644 --- a/dulwich/client.py +++ b/dulwich/client.py @@ -113,6 +113,7 @@ capability_agent, extract_capabilities, extract_capability_names, + filter_ref_prefix, parse_capability, pkt_line, pkt_seq, @@ -944,12 +945,9 @@ def fetch( list of shas to fetch. Defaults to all shas. progress: Optional progress function depth: Depth to fetch at - ref_prefix: Prefix of desired references, as a list of bytestrings. - The server will limit the list of references sent to this prefix, - provided this feature is supported and sufficient server-side - resources are available to match all references against the prefix. - Clients must be prepared to filter out any non-requested references - themselves. This feature is an entirely optional optimization. + ref_prefix: List of prefixes of desired references, as a list of + bytestrings. Filtering is done by the server if supported, and + client side otherwise. filter_spec: A git-rev-list-style object filter spec, as bytestring. Only used if the server supports the Git protocol-v2 'filter' feature, and ignored otherwise. @@ -1026,12 +1024,9 @@ def fetch_pack( pack_data: Callback called for each bit of data in the pack progress: Callback for progress reports (strings) depth: Shallow fetch depth - ref_prefix: Prefix of desired references, as a list of bytestrings. - The server will limit the list of references sent to this prefix, - provided this feature is supported and sufficient server-side - resources are available to match all references against the prefix. - Clients must be prepared to filter out any non-requested references - themselves. This feature is an entirely optional optimization. + ref_prefix: List of prefixes of desired references, as a list of + bytestrings. Filtering is done by the server if supported, and + client side otherwise. filter_spec: A git-rev-list-style object filter spec, as bytestring. Only used if the server supports the Git protocol-v2 'filter' feature, and ignored otherwise. @@ -1338,12 +1333,9 @@ def fetch_pack( pack_data: Callback called for each bit of data in the pack progress: Callback for progress reports (strings) depth: Shallow fetch depth - ref_prefix: Prefix of desired references, as a list of bytestrings. - The server will limit the list of references sent to this prefix, - provided this feature is supported and sufficient server-side - resources are available to match all references against the prefix. - Clients must be prepared to filter out any non-requested references - themselves. This feature is an entirely optional optimization. + ref_prefix: List of prefixes of desired references, as a list of + bytestrings. Filtering is done by the server if supported, and + client side otherwise. filter_spec: A git-rev-list-style object filter spec, as bytestring. Only used if the server supports the Git protocol-v2 'filter' feature, and ignored otherwise. @@ -1372,21 +1364,17 @@ def fetch_pack( ) self.protocol_version = server_protocol_version with proto: - try: - if self.protocol_version == 2: + if self.protocol_version == 2: + try: server_capabilities = read_server_capabilities(proto.read_pkt_seq()) - refs = None - else: - refs, server_capabilities = read_pkt_refs_v1(proto.read_pkt_seq()) - except HangupException as exc: - raise _remote_error_from_stderr(stderr) from exc - ( - negotiated_capabilities, - symrefs, - agent, - ) = self._negotiate_upload_pack_capabilities(server_capabilities) + except HangupException as exc: + raise _remote_error_from_stderr(stderr) from exc + ( + negotiated_capabilities, + symrefs, + agent, + ) = self._negotiate_upload_pack_capabilities(server_capabilities) - if self.protocol_version == 2: proto.write_pkt_line(b"command=ls-refs\n") proto.write(b"0001") # delim-pkt proto.write_pkt_line(b"symrefs") @@ -1397,6 +1385,19 @@ def fetch_pack( proto.write_pkt_line(b"ref-prefix " + prefix) proto.write_pkt_line(None) refs, symrefs, _peeled = read_pkt_refs_v2(proto.read_pkt_seq()) + else: + try: + refs, server_capabilities = read_pkt_refs_v1(proto.read_pkt_seq()) + except HangupException as exc: + raise _remote_error_from_stderr(stderr) from exc + ( + negotiated_capabilities, + symrefs, + agent, + ) = self._negotiate_upload_pack_capabilities(server_capabilities) + + if ref_prefix is not None: + refs = filter_ref_prefix(refs, ref_prefix) if refs is None: proto.write_pkt_line(None) @@ -1501,6 +1502,8 @@ def get_refs( raise _remote_error_from_stderr(stderr) from exc proto.write_pkt_line(None) (_symrefs, _agent) = _extract_symrefs_and_agent(server_capabilities) + if ref_prefix is not None: + refs = filter_ref_prefix(refs, ref_prefix) return refs def archive( @@ -1845,12 +1848,9 @@ def fetch( list of shas to fetch. Defaults to all shas. progress: Optional progress function depth: Shallow fetch depth - ref_prefix: Prefix of desired references, as a list of bytestrings. - The server will limit the list of references sent to this prefix, - provided this feature is supported and sufficient server-side - resources are available to match all references against the prefix. - Clients must be prepared to filter out any non-requested references - themselves. This feature is an entirely optional optimization. + ref_prefix: List of prefixes of desired references, as a list of + bytestrings. Filtering is done by the server if supported, and + client side otherwise. filter_spec: A git-rev-list-style object filter spec, as bytestring. Only used if the server supports the Git protocol-v2 'filter' feature, and ignored otherwise. @@ -1891,12 +1891,9 @@ def fetch_pack( pack_data: Callback called for each bit of data in the pack progress: Callback for progress reports (strings) depth: Shallow fetch depth - ref_prefix: Prefix of desired references, as a list of bytestrings. - The server will limit the list of references sent to this prefix, - provided this feature is supported and sufficient server-side - resources are available to match all references against the prefix. - Clients must be prepared to filter out any non-requested references - themselves. This feature is an entirely optional optimization. + ref_prefix: List of prefixes of desired references, as a list of + bytestrings. Filtering is done by the server if supported, and + client side otherwise. filter_spec: A git-rev-list-style object filter spec, as bytestring. Only used if the server supports the Git protocol-v2 'filter' feature, and ignored otherwise. @@ -2533,10 +2530,14 @@ def begin_protocol_v2(proto): (symrefs, agent) = _extract_symrefs_and_agent( server_capabilities ) + if ref_prefix is not None: + refs = filter_ref_prefix(refs, ref_prefix) return refs, server_capabilities, base_url, symrefs, peeled else: self.protocol_version = 0 # dumb servers only support protocol v0 (refs, peeled) = split_peeled_refs(read_info_refs(resp)) + if ref_prefix is not None: + refs = filter_ref_prefix(refs, ref_prefix) return refs, set(), base_url, {}, peeled finally: resp.close() @@ -2651,12 +2652,9 @@ def fetch_pack( pack_data: Callback called for each bit of data in the pack progress: Callback for progress reports (strings) depth: Depth for request - ref_prefix: Prefix of desired references, as a list of bytestrings. - The server will limit the list of references sent to this prefix, - provided this feature is supported and sufficient server-side - resources are available to match all references against the prefix. - Clients must be prepared to filter out any non-requested references - themselves. This feature is an entirely optional optimization. + ref_prefix: List of prefixes of desired references, as a list of + bytestrings. Filtering is done by the server if supported, and + client side otherwise. filter_spec: A git-rev-list-style object filter spec, as bytestring. Only used if the server supports the Git protocol-v2 'filter' feature, and ignored otherwise. diff --git a/dulwich/protocol.py b/dulwich/protocol.py index 56702c6bb..c0128f158 100644 --- a/dulwich/protocol.py +++ b/dulwich/protocol.py @@ -194,6 +194,16 @@ def pkt_seq(*seq): return b"".join([pkt_line(s) for s in seq]) + pkt_line(None) +def filter_ref_prefix(refs, prefixes): + """Filter refs to only include those with a given prefix. + + Args: + refs: A list of refs. + prefix: The prefix to filter by. + """ + return {k: v for k, v in refs.items() if any(k.startswith(p) for p in prefixes)} + + class Protocol: """Class for interacting with a remote git process over the wire. diff --git a/tests/compat/test_client.py b/tests/compat/test_client.py index 62cf722f0..55bddb2dc 100644 --- a/tests/compat/test_client.py +++ b/tests/compat/test_client.py @@ -270,6 +270,19 @@ def test_get_refs_with_peeled_tag(self): sorted(refs.keys()), ) + def test_get_refs_with_ref_prefix(self): + c = self._client() + refs = c.get_refs( + self._build_path("/server_new.export"), ref_prefix=[b"refs/heads"] + ) + self.assertEqual( + [ + b"refs/heads/branch", + b"refs/heads/master", + ], + sorted(refs.keys()), + ) + def test_fetch_pack_depth(self): c = self._client() with repo.Repo(os.path.join(self.gitroot, "dest")) as dest: diff --git a/tests/test_protocol.py b/tests/test_protocol.py index 5e29826dd..c374eb418 100644 --- a/tests/test_protocol.py +++ b/tests/test_protocol.py @@ -35,6 +35,7 @@ ack_type, extract_capabilities, extract_want_line_capabilities, + filter_ref_prefix, pkt_line, pkt_seq, ) @@ -42,14 +43,29 @@ from . import TestCase -class PktLinetests: +class PktLineTests(TestCase): def test_pkt_line(self): self.assertEqual(b"0007bla", pkt_line(b"bla")) self.assertEqual(b"0000", pkt_line(None)) def test_pkt_seq(self): - self.assertEqual(b"0007bla0003foo0000", pkt_seq([b"bla", b"foo"])) - self.assertEqual(b"0000", pkt_seq([])) + self.assertEqual(b"0007bla0007foo0000", pkt_seq(b"bla", b"foo")) + self.assertEqual(b"0000", pkt_seq()) + + +class FilterRefPrefixTests(TestCase): + def test_filter_ref_prefix(self): + self.assertEqual( + {b"refs/heads/foo": b"0123456789", b"refs/heads/bar": b"0123456789"}, + filter_ref_prefix( + { + b"refs/heads/foo": b"0123456789", + b"refs/heads/bar": b"0123456789", + b"refs/tags/bar": b"0123456789", + }, + [b"refs/heads/"], + ), + ) class BaseProtocolTests: