Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: base CloudEvent class as per v1 specs, including attribute validation #242

Merged
merged 20 commits into from
Nov 16, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
a2ac762
feat: base `CloudEvent` class as per v1 specs, including attribute va…
PlugaruT Nov 8, 2024
8db1e29
chore: add typings and docstrings
PlugaruT Nov 8, 2024
35dee7d
chore: Add support for custom extension names and validate them
PlugaruT Nov 9, 2024
42b4fe1
chore: Add copyright and fix missing type info
PlugaruT Nov 9, 2024
f83c363
chore: Add getters for attributes and test happy path
PlugaruT Nov 9, 2024
9d1aa35
fix: typing
PlugaruT Nov 9, 2024
aa81ca0
chore: Split validation logic into smaller methods
PlugaruT Nov 11, 2024
b2b0649
chore: Add method to extract extension by name
PlugaruT Nov 11, 2024
b202325
chore: configure ruff to sort imports also
PlugaruT Nov 11, 2024
c5e6df9
chore: Returns all the errors at ones instead of raising early. Impro…
PlugaruT Nov 11, 2024
6e13f72
fix missing type info
PlugaruT Nov 11, 2024
e78a70b
chore: Improve exceptions handling. Have exceptions grouped by attrib…
PlugaruT Nov 13, 2024
443aee9
chore: Skip type checing for getters of required attributes
PlugaruT Nov 13, 2024
1d43d68
fix: missing type
PlugaruT Nov 14, 2024
21493e1
chore: Improve exceptions and introduce a new one for invalid values
PlugaruT Nov 14, 2024
68337f9
fix: str representation for validation error
PlugaruT Nov 14, 2024
d0bba86
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 14, 2024
599d05c
fix: Fix missing type definitions
PlugaruT Nov 14, 2024
43f1d0c
small fix
PlugaruT Nov 14, 2024
7d18098
remove cast of defaultdict to dict
PlugaruT Nov 14, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/cloudevents/core/v1/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"""
CloudEvent implementation for v1.0
"""
140 changes: 140 additions & 0 deletions src/cloudevents/core/v1/event.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
# Copyright 2018-Present The CloudEvents Authors
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.

from typing import Any, Optional
from datetime import datetime
import re

REQUIRED_ATTRIBUTES = {"id", "source", "type", "specversion"}
OPTIONAL_ATTRIBUTES = {"datacontenttype", "dataschema", "subject", "time"}
PlugaruT marked this conversation as resolved.
Show resolved Hide resolved


class CloudEvent:
PlugaruT marked this conversation as resolved.
Show resolved Hide resolved
PlugaruT marked this conversation as resolved.
Show resolved Hide resolved
def __init__(self, attributes: dict, data: Optional[dict] = None) -> None:
PlugaruT marked this conversation as resolved.
Show resolved Hide resolved
"""
Create a new CloudEvent instance.

:param attributes: The attributes of the CloudEvent instance.
:type attributes: dict
:param data: The payload of the CloudEvent instance.
:type data: Optional[dict]
PlugaruT marked this conversation as resolved.
Show resolved Hide resolved

:raises ValueError: If any of the required attributes are missing or have invalid values.
:raises TypeError: If any of the attributes have invalid types.
"""
self._validate_attribute(attributes)
self._attributes = attributes
self._data = data
PlugaruT marked this conversation as resolved.
Show resolved Hide resolved

def _validate_attribute(self, attributes: dict) -> None:
PlugaruT marked this conversation as resolved.
Show resolved Hide resolved
"""
Private method that validates the attributes of the CloudEvent as per the CloudEvents specification.
PlugaruT marked this conversation as resolved.
Show resolved Hide resolved
"""
missing_attributes = [
attr for attr in REQUIRED_ATTRIBUTES if attr not in attributes
]
if missing_attributes:
raise ValueError(
f"Missing required attribute(s): {', '.join(missing_attributes)}"
)

if attributes["id"] is None:
raise ValueError("Attribute 'id' must not be None")

if not isinstance(attributes["id"], str):
raise TypeError("Attribute 'id' must be a string")

if not isinstance(attributes["source"], str):
raise TypeError("Attribute 'source' must be a string")

if not isinstance(attributes["type"], str):
raise TypeError("Attribute 'type' must be a string")

if not isinstance(attributes["specversion"], str):
raise TypeError("Attribute 'specversion' must be a string")

if attributes["specversion"] != "1.0":
raise ValueError("Attribute 'specversion' must be '1.0'")

if "time" in attributes:
if not isinstance(attributes["time"], datetime):
raise TypeError("Attribute 'time' must be a datetime object")

if not attributes["time"].tzinfo:
raise ValueError("Attribute 'time' must be timezone aware")

if "subject" in attributes:
if not isinstance(attributes["subject"], str):
raise TypeError("Attribute 'subject' must be a string")

if not attributes["subject"]:
raise ValueError("Attribute 'subject' must not be empty")

if "datacontenttype" in attributes:
if not isinstance(attributes["datacontenttype"], str):
raise TypeError("Attribute 'datacontenttype' must be a string")

if not attributes["datacontenttype"]:
raise ValueError("Attribute 'datacontenttype' must not be empty")

if "dataschema" in attributes:
if not isinstance(attributes["dataschema"], str):
raise TypeError("Attribute 'dataschema' must be a string")

if not attributes["dataschema"]:
raise ValueError("Attribute 'dataschema' must not be empty")

for custom_extension in (
set(attributes.keys()) - REQUIRED_ATTRIBUTES - OPTIONAL_ATTRIBUTES
):
PlugaruT marked this conversation as resolved.
Show resolved Hide resolved
if custom_extension == "data":
raise ValueError(
"Extension attribute 'data' is reserved and must not be used"
)

if not custom_extension[0].isalpha():
PlugaruT marked this conversation as resolved.
Show resolved Hide resolved
raise ValueError(
f"Extension attribute '{custom_extension}' should start with a letter"
)

if not (5 <= len(custom_extension) <= 20):
PlugaruT marked this conversation as resolved.
Show resolved Hide resolved
raise ValueError(
f"Extension attribute '{custom_extension}' should be between 5 and 20 characters long"
)

if not re.match(r"^[a-z0-9]+$", custom_extension):
raise ValueError(
f"Extension attribute '{custom_extension}' should only contain lowercase letters and numbers"
)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's probably a good idea to:

  1. split this method into multiple smaller ones with distinct validations.
  2. maybe extract some reusable logic
  3. maybe combine checks in order to avoid reporting attribute errors one-by-one. Meaning that we should go over all attributes and emit a combined error with all the possible attribute errors. Just as you did with required attributes

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My goal is to have this validation logic in a single place, it's easier to follow IMO and read it, rather than navigate into multiple small methods.

About extracting reusable logic, I can try to look into this, but I also wanted to have it "match" the spec and treat each attribute as individual. Again, for readability reasons mainly. It's easier to reason about when the whole validation logic is colocated.

maybe combine checks in order to avoid reporting attribute errors one-by-one.

I'm a fan of "fail fast" approach, what would be an advantage of raising all exceptions at once?

Copy link
Author

@PlugaruT PlugaruT Nov 9, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do you think about something like this?

        CloudEvent._validate_required_attributes(attributes)
        CloudEvent._validate_attribute_types(attributes)
        CloudEvent._validate_optional_attributes(attributes)
        CloudEvent._validate_extension_attributes(attributes)

and each method will hold the necessary validation logic?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Grouping CloudEvents (CE) attributes validation with required and optional as well as having a separate extension rules is IMO a good idea.

On the fail fast approach - sure, that's a valid one, but IMO not in this case. I guess people would only see the validation errors when they are developing against CE, not during active usage (hopefully). So it makes sense to gather all the validation errors all together and present them together to ease analysis of a defective event. I'd probably even go and create a custom error instead of using standard ones with more details on what and where has failed the validation.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess people would only see the validation errors when they are developing against CE, not during active usage (hopefully).

From my brief experience using CloudEvents with Python and Java SDK, the validation usually fails at runtime, in production 😅 it's where all the random crap gets into the events for different reasons. I'm fine with returning all the validation errors at once, not my first option, but I'll do it.


def get_attribute(self, attribute: str) -> Optional[Any]:
"""
Retrieve a value of an attribute of the event denoted by the given `attribute`.

:param attribute: The name of the event attribute to retrieve the value for.
:type attribute: str

:return: The event attribute value.
:rtype: Optional[Any]
PlugaruT marked this conversation as resolved.
Show resolved Hide resolved
"""
return self._attributes[attribute]

def get_data(self) -> Optional[dict]:
"""
Retrieve data of the event.

:return: The data of the event.
:rtype: Optional[dict]
"""
return self._data
Empty file added tests/test_core/__init__.py
Empty file.
Empty file.
196 changes: 196 additions & 0 deletions tests/test_core/test_v1/test_event.py
PlugaruT marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
from cloudevents.core.v1.event import CloudEvent

import pytest
from datetime import datetime
from typing import Any, Optional


@pytest.mark.parametrize(
"attributes, missing_attribute",
[
({"source": "/", "type": "test", "specversion": "1.0"}, "id"),
({"id": "1", "type": "test", "specversion": "1.0"}, "source"),
({"id": "1", "source": "/", "specversion": "1.0"}, "type"),
({"id": "1", "source": "/", "type": "test"}, "specversion"),
],
)
def test_missing_required_attribute(attributes: dict, missing_attribute: str) -> None:
with pytest.raises(ValueError) as e:
CloudEvent(attributes)

assert str(e.value) == f"Missing required attribute(s): {missing_attribute}"


@pytest.mark.parametrize(
"id,error",
[
(None, "Attribute 'id' must not be None"),
(12, "Attribute 'id' must be a string"),
],
)
def test_id_validation(id: Optional[Any], error: str) -> None:
with pytest.raises((ValueError, TypeError)) as e:
CloudEvent({"id": id, "source": "/", "type": "test", "specversion": "1.0"})

assert str(e.value) == error


@pytest.mark.parametrize("source,error", [(123, "Attribute 'source' must be a string")])
def test_source_validation(source: Any, error: str) -> None:
with pytest.raises((ValueError, TypeError)) as e:
CloudEvent({"id": "1", "source": source, "type": "test", "specversion": "1.0"})

assert str(e.value) == error


@pytest.mark.parametrize(
"specversion,error",
[
(1.0, "Attribute 'specversion' must be a string"),
("1.4", "Attribute 'specversion' must be '1.0'"),
],
)
def test_specversion_validation(specversion: Any, error: str) -> None:
with pytest.raises((ValueError, TypeError)) as e:
CloudEvent(
{"id": "1", "source": "/", "type": "test", "specversion": specversion}
)

assert str(e.value) == error


@pytest.mark.parametrize(
"time,error",
[
("2023-10-25T17:09:19.736166Z", "Attribute 'time' must be a datetime object"),
(
datetime(2023, 10, 25, 17, 9, 19, 736166),
"Attribute 'time' must be timezone aware",
),
],
)
def test_time_validation(time: Any, error: str) -> None:
with pytest.raises((ValueError, TypeError)) as e:
CloudEvent(
{
"id": "1",
"source": "/",
"type": "test",
"specversion": "1.0",
"time": time,
}
)

assert str(e.value) == error


@pytest.mark.parametrize(
"subject,error",
[
(1234, "Attribute 'subject' must be a string"),
(
"",
"Attribute 'subject' must not be empty",
),
],
)
def test_subject_validation(subject: Any, error: str) -> None:
with pytest.raises((ValueError, TypeError)) as e:
CloudEvent(
{
"id": "1",
"source": "/",
"type": "test",
"specversion": "1.0",
"subject": subject,
}
)

assert str(e.value) == error


@pytest.mark.parametrize(
"datacontenttype,error",
[
(1234, "Attribute 'datacontenttype' must be a string"),
(
"",
"Attribute 'datacontenttype' must not be empty",
),
],
)
def test_datacontenttype_validation(datacontenttype: Any, error: str) -> None:
with pytest.raises((ValueError, TypeError)) as e:
CloudEvent(
{
"id": "1",
"source": "/",
"type": "test",
"specversion": "1.0",
"datacontenttype": datacontenttype,
}
)

assert str(e.value) == error


@pytest.mark.parametrize(
"dataschema,error",
[
(1234, "Attribute 'dataschema' must be a string"),
(
"",
"Attribute 'dataschema' must not be empty",
),
],
)
def test_dataschema_validation(dataschema: Any, error: str) -> None:
with pytest.raises((ValueError, TypeError)) as e:
CloudEvent(
{
"id": "1",
"source": "/",
"type": "test",
"specversion": "1.0",
"dataschema": dataschema,
}
)

assert str(e.value) == error


@pytest.mark.parametrize(
"extension_name,error",
[
("123", "Extension attribute '123' should start with a letter"),
(
"shrt",
"Extension attribute 'shrt' should be between 5 and 20 characters long",
),
(
"thisisaverylongextension",
"Extension attribute 'thisisaverylongextension' should be between 5 and 20 characters long",
),
(
"ThisIsNotValid",
"Extension attribute 'ThisIsNotValid' should only contain lowercase letters and numbers",
),
(
"data",
"Extension attribute 'data' is reserved and must not be used",
),
],
)
def test_custom_extension(extension_name: str, error: str) -> None:
with pytest.raises(ValueError) as e:
CloudEvent(
{
"id": "1",
"source": "/",
"type": "test",
"specversion": "1.0",
extension_name: "value",
}
)

assert str(e.value) == error