Skip to content

Commit

Permalink
gh-61460: Stronger HMAC in multiprocessing (#20380)
Browse files Browse the repository at this point in the history
bpo-17258:  `multiprocessing` now supports stronger HMAC algorithms for inter-process connection authentication rather than only HMAC-MD5.

Signed-off-by: Christian Heimes <christian@python.org>

gpshead: I Reworked to be more robust while keeping the idea.

The protocol modification idea remains, but we now take advantage of the
message length as an indicator of legacy vs modern protocol version.  No
more regular expression usage.  We now default to HMAC-SHA256, but do so
in a way that will be compatible when communicating with older clients
or older servers. No protocol transition period is needed.

More integration tests to verify these claims remain true are required. I'm
unaware of anyone depending on multiprocessing connections between
different Python versions.

---------

Signed-off-by: Christian Heimes <christian@python.org>
Co-authored-by: Gregory P. Smith [Google] <greg@krypto.org>
  • Loading branch information
tiran and gpshead authored May 20, 2023
1 parent 12f1581 commit 3ed57e4
Show file tree
Hide file tree
Showing 3 changed files with 200 additions and 41 deletions.
182 changes: 151 additions & 31 deletions Lib/multiprocessing/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -722,11 +722,11 @@ def PipeClient(address):
# Authentication stuff
#

MESSAGE_LENGTH = 20
MESSAGE_LENGTH = 40 # MUST be > 20

CHALLENGE = b'#CHALLENGE#'
WELCOME = b'#WELCOME#'
FAILURE = b'#FAILURE#'
_CHALLENGE = b'#CHALLENGE#'
_WELCOME = b'#WELCOME#'
_FAILURE = b'#FAILURE#'

# multiprocessing.connection Authentication Handshake Protocol Description
# (as documented for reference after reading the existing code)
Expand All @@ -750,7 +750,12 @@ def PipeClient(address):
# ------------------------------ ---------------------------------------
# 0. Open a connection on the pipe.
# 1. Accept connection.
# 2. New random 20 bytes -> MESSAGE
# 2. Random 20+ bytes -> MESSAGE
# Modern servers always send
# more than 20 bytes and include
# a {digest} prefix on it with
# their preferred HMAC digest.
# Legacy ones send ==20 bytes.
# 3. send 4 byte length (net order)
# prefix followed by:
# b'#CHALLENGE#' + MESSAGE
Expand All @@ -763,14 +768,32 @@ def PipeClient(address):
# 6. Assert that M1 starts with:
# b'#CHALLENGE#'
# 7. Strip that prefix from M1 into -> M2
# 8. Compute HMAC-MD5 of AUTHKEY, M2 -> C_DIGEST
# 7.1. Parse M2: if it is exactly 20 bytes in
# length this indicates a legacy server
# supporting only HMAC-MD5. Otherwise the
# 7.2. preferred digest is looked up from an
# expected "{digest}" prefix on M2. No prefix
# or unsupported digest? <- AuthenticationError
# 7.3. Put divined algorithm name in -> D_NAME
# 8. Compute HMAC-D_NAME of AUTHKEY, M2 -> C_DIGEST
# 9. Send 4 byte length prefix (net order)
# followed by C_DIGEST bytes.
# 10. Compute HMAC-MD5 of AUTHKEY,
# MESSAGE into -> M_DIGEST.
# 11. Receive 4 or 4+8 byte length
# 10. Receive 4 or 4+8 byte length
# prefix (#4 dance) -> SIZE.
# 12. Receive min(SIZE, 256) -> C_D.
# 11. Receive min(SIZE, 256) -> C_D.
# 11.1. Parse C_D: legacy servers
# accept it as is, "md5" -> D_NAME
# 11.2. modern servers check the length
# of C_D, IF it is 16 bytes?
# 11.2.1. "md5" -> D_NAME
# and skip to step 12.
# 11.3. longer? expect and parse a "{digest}"
# prefix into -> D_NAME.
# Strip the prefix and store remaining
# bytes in -> C_D.
# 11.4. Don't like D_NAME? <- AuthenticationError
# 12. Compute HMAC-D_NAME of AUTHKEY,
# MESSAGE into -> M_DIGEST.
# 13. Compare M_DIGEST == C_D:
# 14a: Match? Send length prefix &
# b'#WELCOME#'
Expand All @@ -787,42 +810,139 @@ def PipeClient(address):
#
# If this RETURNed, the connection remains open: it has been authenticated.
#
# Length prefixes are used consistently even though every step so far has
# always been a singular specific fixed length. This may help us evolve
# the protocol in the future without breaking backwards compatibility.
#
# Similarly the initial challenge message from the serving side has always
# been 20 bytes, but clients can accept a 100+ so using the length of the
# opening challenge message as an indicator of protocol version may work.
# Length prefixes are used consistently. Even on the legacy protocol, this
# was good fortune and allowed us to evolve the protocol by using the length
# of the opening challenge or length of the returned digest as a signal as
# to which protocol the other end supports.

_ALLOWED_DIGESTS = frozenset(
{b'md5', b'sha256', b'sha384', b'sha3_256', b'sha3_384'})
_MAX_DIGEST_LEN = max(len(_) for _ in _ALLOWED_DIGESTS)

# Old hmac-md5 only server versions from Python <=3.11 sent a message of this
# length. It happens to not match the length of any supported digest so we can
# use a message of this length to indicate that we should work in backwards
# compatible md5-only mode without a {digest_name} prefix on our response.
_MD5ONLY_MESSAGE_LENGTH = 20
_MD5_DIGEST_LEN = 16
_LEGACY_LENGTHS = (_MD5ONLY_MESSAGE_LENGTH, _MD5_DIGEST_LEN)


def _get_digest_name_and_payload(message: bytes) -> (str, bytes):
"""Returns a digest name and the payload for a response hash.
If a legacy protocol is detected based on the message length
or contents the digest name returned will be empty to indicate
legacy mode where MD5 and no digest prefix should be sent.
"""
# modern message format: b"{digest}payload" longer than 20 bytes
# legacy message format: 16 or 20 byte b"payload"
if len(message) in _LEGACY_LENGTHS:
# Either this was a legacy server challenge, or we're processing
# a reply from a legacy client that sent an unprefixed 16-byte
# HMAC-MD5 response. All messages using the modern protocol will
# be longer than either of these lengths.
return '', message
if (message.startswith(b'{') and
(curly := message.find(b'}', 1, _MAX_DIGEST_LEN+2)) > 0):
digest = message[1:curly]
if digest in _ALLOWED_DIGESTS:
payload = message[curly+1:]
return digest.decode('ascii'), payload
raise AuthenticationError(
'unsupported message length, missing digest prefix, '
f'or unsupported digest: {message=}')


def _create_response(authkey, message):
"""Create a MAC based on authkey and message
The MAC algorithm defaults to HMAC-MD5, unless MD5 is not available or
the message has a '{digest_name}' prefix. For legacy HMAC-MD5, the response
is the raw MAC, otherwise the response is prefixed with '{digest_name}',
e.g. b'{sha256}abcdefg...'
Note: The MAC protects the entire message including the digest_name prefix.
"""
import hmac
digest_name = _get_digest_name_and_payload(message)[0]
# The MAC protects the entire message: digest header and payload.
if not digest_name:
# Legacy server without a {digest} prefix on message.
# Generate a legacy non-prefixed HMAC-MD5 reply.
try:
return hmac.new(authkey, message, 'md5').digest()
except ValueError:
# HMAC-MD5 is not available (FIPS mode?), fall back to
# HMAC-SHA2-256 modern protocol. The legacy server probably
# doesn't support it and will reject us anyways. :shrug:
digest_name = 'sha256'
# Modern protocol, indicate the digest used in the reply.
response = hmac.new(authkey, message, digest_name).digest()
return b'{%s}%s' % (digest_name.encode('ascii'), response)


def _verify_challenge(authkey, message, response):
"""Verify MAC challenge
If our message did not include a digest_name prefix, the client is allowed
to select a stronger digest_name from _ALLOWED_DIGESTS.
In case our message is prefixed, a client cannot downgrade to a weaker
algorithm, because the MAC is calculated over the entire message
including the '{digest_name}' prefix.
"""
import hmac
response_digest, response_mac = _get_digest_name_and_payload(response)
response_digest = response_digest or 'md5'
try:
expected = hmac.new(authkey, message, response_digest).digest()
except ValueError:
raise AuthenticationError(f'{response_digest=} unsupported')
if len(expected) != len(response_mac):
raise AuthenticationError(
f'expected {response_digest!r} of length {len(expected)} '
f'got {len(response_mac)}')
if not hmac.compare_digest(expected, response_mac):
raise AuthenticationError('digest received was wrong')


def deliver_challenge(connection, authkey):
import hmac
def deliver_challenge(connection, authkey: bytes, digest_name='sha256'):
if not isinstance(authkey, bytes):
raise ValueError(
"Authkey must be bytes, not {0!s}".format(type(authkey)))
assert MESSAGE_LENGTH > _MD5ONLY_MESSAGE_LENGTH, "protocol constraint"
message = os.urandom(MESSAGE_LENGTH)
connection.send_bytes(CHALLENGE + message)
digest = hmac.new(authkey, message, 'md5').digest()
message = b'{%s}%s' % (digest_name.encode('ascii'), message)
# Even when sending a challenge to a legacy client that does not support
# digest prefixes, they'll take the entire thing as a challenge and
# respond to it with a raw HMAC-MD5.
connection.send_bytes(_CHALLENGE + message)
response = connection.recv_bytes(256) # reject large message
if response == digest:
connection.send_bytes(WELCOME)
try:
_verify_challenge(authkey, message, response)
except AuthenticationError:
connection.send_bytes(_FAILURE)
raise
else:
connection.send_bytes(FAILURE)
raise AuthenticationError('digest received was wrong')
connection.send_bytes(_WELCOME)

def answer_challenge(connection, authkey):
import hmac

def answer_challenge(connection, authkey: bytes):
if not isinstance(authkey, bytes):
raise ValueError(
"Authkey must be bytes, not {0!s}".format(type(authkey)))
message = connection.recv_bytes(256) # reject large message
assert message[:len(CHALLENGE)] == CHALLENGE, 'message = %r' % message
message = message[len(CHALLENGE):]
digest = hmac.new(authkey, message, 'md5').digest()
if not message.startswith(_CHALLENGE):
raise AuthenticationError(
f'Protocol error, expected challenge: {message=}')
message = message[len(_CHALLENGE):]
if len(message) < _MD5ONLY_MESSAGE_LENGTH:
raise AuthenticationError('challenge too short: {len(message)} bytes')
digest = _create_response(authkey, message)
connection.send_bytes(digest)
response = connection.recv_bytes(256) # reject large message
if response != WELCOME:
if response != _WELCOME:
raise AuthenticationError('digest sent was rejected')

#
Expand Down
57 changes: 47 additions & 10 deletions Lib/test/_test_multiprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
import multiprocessing.managers
import multiprocessing.pool
import multiprocessing.queues
from multiprocessing.connection import wait, AuthenticationError

from multiprocessing import util

Expand Down Expand Up @@ -131,8 +132,6 @@ def _resource_unlink(name, rtype):

WIN32 = (sys.platform == "win32")

from multiprocessing.connection import wait

def wait_for_handle(handle, timeout):
if timeout is not None and timeout < 0.0:
timeout = None
Expand Down Expand Up @@ -3042,7 +3041,7 @@ def test_remote(self):
del queue


@hashlib_helper.requires_hashdigest('md5')
@hashlib_helper.requires_hashdigest('sha256')
class _TestManagerRestart(BaseTestCase):

@classmethod
Expand Down Expand Up @@ -3531,7 +3530,7 @@ def test_dont_merge(self):
#

@unittest.skipUnless(HAS_REDUCTION, "test needs multiprocessing.reduction")
@hashlib_helper.requires_hashdigest('md5')
@hashlib_helper.requires_hashdigest('sha256')
class _TestPicklingConnections(BaseTestCase):

ALLOWED_TYPES = ('processes',)
Expand Down Expand Up @@ -3834,7 +3833,7 @@ def test_copy(self):


@unittest.skipUnless(HAS_SHMEM, "requires multiprocessing.shared_memory")
@hashlib_helper.requires_hashdigest('md5')
@hashlib_helper.requires_hashdigest('sha256')
class _TestSharedMemory(BaseTestCase):

ALLOWED_TYPES = ('processes',)
Expand Down Expand Up @@ -4636,7 +4635,7 @@ def test_invalid_handles(self):



@hashlib_helper.requires_hashdigest('md5')
@hashlib_helper.requires_hashdigest('sha256')
class OtherTest(unittest.TestCase):
# TODO: add more tests for deliver/answer challenge.
def test_deliver_challenge_auth_failure(self):
Expand All @@ -4656,7 +4655,7 @@ def __init__(self):
def recv_bytes(self, size):
self.count += 1
if self.count == 1:
return multiprocessing.connection.CHALLENGE
return multiprocessing.connection._CHALLENGE
elif self.count == 2:
return b'something bogus'
return b''
Expand All @@ -4666,14 +4665,52 @@ def send_bytes(self, data):
multiprocessing.connection.answer_challenge,
_FakeConnection(), b'abc')


@hashlib_helper.requires_hashdigest('md5')
@hashlib_helper.requires_hashdigest('sha256')
class ChallengeResponseTest(unittest.TestCase):
authkey = b'supadupasecretkey'

def create_response(self, message):
return multiprocessing.connection._create_response(
self.authkey, message
)

def verify_challenge(self, message, response):
return multiprocessing.connection._verify_challenge(
self.authkey, message, response
)

def test_challengeresponse(self):
for algo in [None, "md5", "sha256"]:
with self.subTest(f"{algo=}"):
msg = b'is-twenty-bytes-long' # The length of a legacy message.
if algo:
prefix = b'{%s}' % algo.encode("ascii")
else:
prefix = b''
msg = prefix + msg
response = self.create_response(msg)
if not response.startswith(prefix):
self.fail(response)
self.verify_challenge(msg, response)

# TODO(gpshead): We need integration tests for handshakes between modern
# deliver_challenge() and verify_response() code and connections running a
# test-local copy of the legacy Python <=3.11 implementations.

# TODO(gpshead): properly annotate tests for requires_hashdigest rather than
# only running these on a platform supporting everything. otherwise logic
# issues preventing it from working on FIPS mode setups will be hidden.

#
# Test Manager.start()/Pool.__init__() initializer feature - see issue 5585
#

def initializer(ns):
ns.test += 1

@hashlib_helper.requires_hashdigest('md5')
@hashlib_helper.requires_hashdigest('sha256')
class TestInitializers(unittest.TestCase):
def setUp(self):
self.mgr = multiprocessing.Manager()
Expand Down Expand Up @@ -5537,7 +5574,7 @@ def is_alive(self):
any(process.is_alive() for process in forked_processes))


@hashlib_helper.requires_hashdigest('md5')
@hashlib_helper.requires_hashdigest('sha256')
class TestSyncManagerTypes(unittest.TestCase):
"""Test all the types which can be shared between a parent and a
child process by using a manager which acts as an intermediary
Expand Down Expand Up @@ -5969,7 +6006,7 @@ def install_tests_in_module_dict(remote_globs, start_method):
class Temp(base, Mixin, unittest.TestCase):
pass
if type_ == 'manager':
Temp = hashlib_helper.requires_hashdigest('md5')(Temp)
Temp = hashlib_helper.requires_hashdigest('sha256')(Temp)
Temp.__name__ = Temp.__qualname__ = newname
Temp.__module__ = __module__
remote_globs[newname] = Temp
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
:mod:`multiprocessing` now supports stronger HMAC algorithms for inter-process
connection authentication rather than only HMAC-MD5.

0 comments on commit 3ed57e4

Please sign in to comment.