canonical · TheRealFalcon · Jan 15, 2025 · Jan 14, 2025 · Jan 14, 2025 · Jan 14, 2025
diff --git a/cloudinit/cmd/main.py b/cloudinit/cmd/main.py
@@ -330,8 +330,12 @@ def _should_wait_via_user_data(
     if not raw_config:
         return False, "no configuration found"
 
+    # Since this could be some arbitrarily large blob of binary data,
+    # such as a gzipped file, only grab enough to inspect the header.
+    # Since we can get a header like #cloud-config-archive, make sure
+    # we grab enough to not be incorrectly identified as cloud-config.
     if (
-        handlers.type_from_starts_with(raw_config.strip()[:13])
+        handlers.type_from_starts_with(raw_config.strip()[:42])
         != "text/cloud-config"
     ):
         return True, "non-cloud-config user data found"
@@ -348,6 +352,9 @@ def _should_wait_via_user_data(
         )
         return True, "failed to parse user data as yaml"
 
+    if not isinstance(parsed_yaml, dict):
+        return True, "parsed config not in cloud-config format"
+
     # These all have the potential to require network access, so we should wait
     if "write_files" in parsed_yaml:
         for item in parsed_yaml["write_files"]:

diff --git a/tests/integration_tests/modules/test_cloud_config_archive.py b/tests/integration_tests/modules/test_cloud_config_archive.py
@@ -0,0 +1,31 @@
+import pytest
+
+from tests.integration_tests.instances import IntegrationInstance
+from tests.integration_tests.util import verify_clean_boot, verify_clean_log
+
+USER_DATA = """\
+#cloud-config-archive
+- type: "text/cloud-boothook"
+  content: |
+    #!/bin/sh
+    echo "this is from a boothook." > /var/tmp/boothook.txt
+- type: "text/cloud-config"
+  content: |
+    bootcmd:
+    - echo "this is from a cloud-config." > /var/tmp/bootcmd.txt
+"""
+
+
+@pytest.mark.ci
+@pytest.mark.user_data(USER_DATA)
+def test_cloud_config_archive(client: IntegrationInstance):
+    """Basic correctness test for #cloud-config-archive."""
+    log = client.read_from_file("/var/log/cloud-init.log")
+    assert "this is from a boothook." in client.read_from_file(
+        "/var/tmp/boothook.txt"
+    )
+    assert "this is from a cloud-config." in client.read_from_file(
+        "/var/tmp/bootcmd.txt"
+    )
+    verify_clean_log(log)
+    verify_clean_boot(client)
diff --git a/tests/unittests/cmd/test_main.py b/tests/unittests/cmd/test_main.py
@@ -18,6 +18,19 @@
 )
 
 
+CLOUD_CONFIG_ARCHIVE = """\
+#cloud-config-archive
+- type: "text/cloud-boothook"
+  content: |
+    #!/bin/sh
+    echo "this is from a boothook." > /var/tmp/boothook.txt
+- type: "text/cloud-config"
+  content: |
+    bootcmd:
+    - echo "this is from a cloud-config." > /var/tmp/bootcmd.txt
+"""
+
+
 EXTRA_CLOUD_CONFIG = """\
 #cloud-config
 write_files
@@ -264,6 +277,8 @@ def test_main_sys_argv(
             ),
             # Not parseable as yaml
             (mock.Mock(), "#cloud-config\nbootcmd:\necho hello", True),
+            # Yaml that parses to list
+            (mock.Mock(), CLOUD_CONFIG_ARCHIVE, True),
             # Non-cloud-config
             (mock.Mock(), "#!/bin/bash\n  - echo hello", True),
             # Something that after processing won't decode to utf-8