Skip to content
This repository has been archived by the owner on Nov 30, 2022. It is now read-only.

Commit

Permalink
[#743] Store provided identity data in application database (#834)
Browse files Browse the repository at this point in the history
* adds identity fields to PrivacyRequest model

* store identity data inside database

* update changelog

* add identities in test data command

* store identities provided via the DRP creation endpoint

* black + isort

* store provided identity data in request creation from onetrust

* remove deprecated migration

* adds new provided identity table

* use new provided identity table

* add docstring, remove comment

* update DRP privacy request creation to use ProvidedIdentity model

* update identity creation in test data command

* use persisted identity in OneTrust

* update test to use persisted identity

* isort update

* use enums

* optionally receive a salt in hash_value cmd

* use a constant salt for provided identity hashing

* remove import

* use typehints

* update typedef

* use enum in dict
  • Loading branch information
Sean Preston committed Jul 11, 2022
1 parent 55d8e65 commit 9c028f4
Show file tree
Hide file tree
Showing 11 changed files with 266 additions and 19 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ The types of changes are:
* Adds SaaS connection type to SaaS yaml config [748](https://github.com/ethyca/fidesops/pull/748)
* Adds endpoint to get available connectors (database and saas) [#768](https://github.com/ethyca/fidesops/pull/768)
* Adds endpoint to get the secrets required for different connectors [#795](https://github.com/ethyca/fidesops/pull/795)
* Store provided identity data in the privacy request table [#743](https://github.com/ethyca/fidesops/pull/834)

### Developer Experience
* Replace user authentication routes with fideslib routes [#811](https://github.com/ethyca/fidesops/pull/811)
Expand Down
10 changes: 9 additions & 1 deletion create_test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from fidesops.models.policy import ActionType, Policy, Rule, RuleTarget
from fidesops.models.privacy_request import PrivacyRequest, PrivacyRequestStatus
from fidesops.models.storage import ResponseFormat, StorageConfig
from fidesops.schemas.redis_cache import PrivacyRequestIdentity
from fidesops.schemas.storage.storage import FileNaming, StorageDetails, StorageType
from fidesops.util.data_category import DataCategory

Expand Down Expand Up @@ -176,7 +177,7 @@ def create_test_data(db: orm.Session) -> FidesUser:

for policy in policies:
for status in PrivacyRequestStatus.__members__.values():
PrivacyRequest.create(
pr = PrivacyRequest.create(
db=db,
data={
"external_id": f"ext-{uuid4()}",
Expand All @@ -188,6 +189,13 @@ def create_test_data(db: orm.Session) -> FidesUser:
"client_id": policy.client_id,
},
)
pr.persist_identity(
db=db,
identity=PrivacyRequestIdentity(
email="test@example.com",
phone_number="+1 234 567 8910",
),
)

print("Adding connection configs")
_create_connection_configs(db)
Expand Down
17 changes: 11 additions & 6 deletions src/fidesops/api/v1/endpoints/drp_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,12 +89,6 @@ def create_drp_privacy_request(
)

try:
privacy_request: PrivacyRequest = PrivacyRequest.create(
db=db, data=privacy_request_kwargs
)

logger.info(f"Decrypting identity for DRP privacy request {privacy_request.id}")

decrypted_identity: DrpIdentity = DrpIdentity(
**jwt.decode(data.identity, jwt_key, algorithms=["HS256"])
)
Expand All @@ -103,6 +97,17 @@ def create_drp_privacy_request(
drp_identity=decrypted_identity
)

privacy_request: PrivacyRequest = PrivacyRequest.create(
db=db,
data=privacy_request_kwargs,
)
privacy_request.persist_identity(
db=db,
identity=mapped_identity,
)

logger.info(f"Decrypting identity for DRP privacy request {privacy_request.id}")

cache_data(privacy_request, policy, mapped_identity, None, data)

queue_privacy_request(privacy_request.id)
Expand Down
3 changes: 3 additions & 0 deletions src/fidesops/api/v1/endpoints/privacy_request_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,9 @@ def create_privacy_request(

try:
privacy_request: PrivacyRequest = PrivacyRequest.create(db=db, data=kwargs)
privacy_request.persist_identity(
db=db, identity=privacy_request_data.identity
)

cache_data(
privacy_request,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
"""adds provided identity table for identity storage and later identity search
Revision ID: 3c5e1253465d
Revises: fc90277bbcde
Create Date: 2022-07-08 11:53:05.215848
"""
import sqlalchemy as sa
import sqlalchemy_utils
from alembic import op

# revision identifiers, used by Alembic.
revision = "3c5e1253465d"
down_revision = "fc90277bbcde"
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
"providedidentity",
sa.Column("id", sa.String(length=255), nullable=False),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=True,
),
sa.Column(
"updated_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=True,
),
sa.Column("privacy_request_id", sa.String(), nullable=False),
sa.Column(
"field_name",
sa.Enum("email", "phone_number", name="providedidentitytype"),
nullable=False,
),
sa.Column("hashed_value", sa.String(), nullable=True),
sa.Column(
"encrypted_value",
sqlalchemy_utils.types.encrypted.encrypted_type.StringEncryptedType(),
nullable=True,
),
sa.ForeignKeyConstraint(
["privacy_request_id"],
["privacyrequest.id"],
),
sa.PrimaryKeyConstraint("id"),
)
op.create_index(
op.f("ix_providedidentity_hashed_value"),
"providedidentity",
["hashed_value"],
unique=False,
)
op.create_index(
op.f("ix_providedidentity_id"), "providedidentity", ["id"], unique=False
)
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index(op.f("ix_providedidentity_id"), table_name="providedidentity")
op.drop_index(
op.f("ix_providedidentity_hashed_value"), table_name="providedidentity"
)
op.drop_table("providedidentity")
# ### end Alembic commands ###
109 changes: 106 additions & 3 deletions src/fidesops/models/privacy_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from typing import Any, Dict, List, Optional

from celery.result import AsyncResult
from fideslib.cryptography.cryptographic_util import hash_with_salt
from fideslib.db.base import Base
from fideslib.db.base_class import FidesBase
from fideslib.models.audit_log import AuditLog
Expand All @@ -17,12 +18,17 @@
from sqlalchemy import Enum as EnumColumn
from sqlalchemy import ForeignKey, String
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.ext.mutable import MutableList
from sqlalchemy.ext.mutable import MutableDict, MutableList
from sqlalchemy.orm import Session, backref, relationship
from sqlalchemy_utils.types.encrypted.encrypted_type import (
AesGcmEngine,
StringEncryptedType,
)

from fidesops.api.v1.scope_registry import PRIVACY_REQUEST_CALLBACK_RESUME
from fidesops.common_exceptions import PrivacyRequestPaused
from fidesops.core.config import config
from fidesops.db.base_class import JSONTypeOverride
from fidesops.graph.config import CollectionAddress
from fidesops.models.policy import (
ActionType,
Expand Down Expand Up @@ -202,13 +208,16 @@ def create(cls, db: Session, *, data: Dict[str, Any]) -> FidesBase:

def delete(self, db: Session) -> None:
"""
Clean up the cached data related to this privacy request before deleting this
object from the database
Clean up the cached and persisted data related to this privacy request before
deleting this object from the database
"""
cache: FidesopsRedis = get_cache()
all_keys = get_all_cache_keys_for_privacy_request(privacy_request_id=self.id)
for key in all_keys:
cache.delete(key)

for provided_identity in self.provided_identities:
provided_identity.delete(db=db)
super().delete(db=db)

def cache_identity(self, identity: PrivacyRequestIdentity) -> None:
Expand All @@ -222,6 +231,39 @@ def cache_identity(self, identity: PrivacyRequestIdentity) -> None:
value,
)

def persist_identity(self, db: Session, identity: PrivacyRequestIdentity) -> None:
"""
Stores the identity provided with the privacy request in a secure way, compatible with
blind indexing for later searching and audit purposes.
"""
identity_dict: Dict[str, Any] = dict(identity)
for key, value in identity_dict.items():
if value is not None:
hashed_value = ProvidedIdentity.hash_value(value)
ProvidedIdentity.create(
db=db,
data={
"privacy_request_id": self.id,
"field_name": key,
# We don't need to manually encrypt this field, it's done at the ORM level
"encrypted_value": {"value": value},
"hashed_value": hashed_value,
},
)

def get_persisted_identity(self) -> PrivacyRequestIdentity:
"""
Retrieves persisted identity fields from the DB.
"""
schema = PrivacyRequestIdentity()
for field in self.provided_identities:
setattr(
schema,
field.field_name.value,
field.encrypted_value["value"],
)
return schema

def cache_task_id(self, task_id: str) -> None:
"""Sets a task_id for this privacy request's asynchronous execution."""
cache: FidesopsRedis = get_cache()
Expand Down Expand Up @@ -493,6 +535,67 @@ def error_processing(self, db: Session) -> None:
)


class ProvidedIdentityType(EnumType):
"""Enum for privacy request identity types"""

email = "email"
phone_number = "phone_number"


class ProvidedIdentity(Base): # pylint: disable=R0904
"""
A table for storing identity fields and values provided at privacy request
creation time.
"""

privacy_request_id = Column(
String,
ForeignKey(PrivacyRequest.id_field_path),
nullable=False,
)
privacy_request = relationship(
PrivacyRequest,
backref="provided_identities",
) # Which privacy request this identity belongs to

field_name = Column(
EnumColumn(ProvidedIdentityType),
index=False,
nullable=False,
)
hashed_value = Column(
String,
index=True,
unique=False,
nullable=True,
) # This field is used as a blind index for exact match searches
encrypted_value = Column(
MutableDict.as_mutable(
StringEncryptedType(
JSONTypeOverride,
config.security.APP_ENCRYPTION_KEY,
AesGcmEngine,
"pkcs5",
)
),
nullable=True,
) # Type bytea in the db

@classmethod
def hash_value(
cls,
value: str,
encoding: str = "UTF-8",
) -> tuple[str, str]:
"""Utility function to hash a user's password with a generated salt"""
SALT = "a-salt"
hashed_value = hash_with_salt(
value.encode(encoding),
SALT.encode(encoding),
)
return hashed_value


# Unique text to separate a step from a collection address, so we can store two values in one.
PAUSED_SEPARATOR = "__fidesops_paused_sep__"

Expand Down
14 changes: 9 additions & 5 deletions src/fidesops/service/drp/drp_fidesops_mapper.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
from typing import Dict

from fidesops.models.privacy_request import PrivacyRequestStatus
from fidesops.models.privacy_request import PrivacyRequestStatus, ProvidedIdentityType
from fidesops.schemas.drp_privacy_request import DrpIdentity
from fidesops.schemas.privacy_request import PrivacyRequestDRPStatus
from fidesops.schemas.redis_cache import PrivacyRequestIdentity
Expand All @@ -22,17 +22,21 @@ def map_identity(drp_identity: DrpIdentity) -> PrivacyRequestIdentity:
This class also allows us to implement custom logic to handle "verified" id props.
"""
fidesops_identity_kwargs: Dict[str, str] = {}
DRP_TO_FIDESOPS_SUPPORTED_IDENTITY_PROPS_MAP: Dict[str, str] = {
"email": "email",
"phone_number": "phone_number",
DRP_TO_FIDESOPS_SUPPORTED_IDENTITY_PROPS_MAP: Dict[
str, ProvidedIdentityType
] = {
"email": ProvidedIdentityType.email,
"phone_number": ProvidedIdentityType.phone_number,
}
for attr, val in drp_identity.__dict__.items():
if attr not in DRP_TO_FIDESOPS_SUPPORTED_IDENTITY_PROPS_MAP:
logger.warning(
f"Identity attribute of {attr} is not supported by Fidesops at this time. Continuing to use other identity props, if provided."
)
else:
fidesops_prop: str = DRP_TO_FIDESOPS_SUPPORTED_IDENTITY_PROPS_MAP[attr]
fidesops_prop: str = DRP_TO_FIDESOPS_SUPPORTED_IDENTITY_PROPS_MAP[
attr
].value
fidesops_identity_kwargs[fidesops_prop] = val
return PrivacyRequestIdentity(**fidesops_identity_kwargs)

Expand Down
5 changes: 5 additions & 0 deletions src/fidesops/service/privacy_request/onetrust_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,12 @@ def _create_privacy_request( # pylint: disable=R0913
"client_id": onetrust_policy.client_id,
"external_id": subtask_id,
}

privacy_request: PrivacyRequest = PrivacyRequest.create(db=db, data=kwargs)
privacy_request.persist_identity(
db=db,
identity=PrivacyRequestIdentity(email=identity.email),
)
privacy_request.cache_identity(identity)
try:
queue_privacy_request(privacy_request_id=privacy_request.id)
Expand Down
14 changes: 11 additions & 3 deletions tests/api/v1/endpoints/test_drp_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,12 @@ def test_create_drp_privacy_request(
policy_drp_action,
cache,
):

identity = {"email": "test@example.com"}
TEST_EMAIL = "test@example.com"
TEST_PHONE_NUMBER = "+1 234 567 8910"
identity = {
"email": TEST_EMAIL,
"phone_number": TEST_PHONE_NUMBER,
}
encoded_identity: str = jwt.encode(
identity, config.security.DRP_JWT_SECRET, algorithm="HS256"
)
Expand Down Expand Up @@ -84,13 +88,17 @@ def test_create_drp_privacy_request(
)
assert (
cache.get(identity_key)
== "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJlbWFpbCI6InRlc3RAZXhhbXBsZS5jb20ifQ.4I8XLWnTYp8oMHjN2ypP3Hpg45DIaGNAEmj1QCYONUI"
== "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJlbWFpbCI6InRlc3RAZXhhbXBsZS5jb20iLCJwaG9uZV9udW1iZXIiOiIrMSAyMzQgNTY3IDg5MTAifQ.kHV4ru6vxQR96Meae31oKIU7mMnTJgt1cnli6GLUBFk"
)
fidesops_identity_key = get_identity_cache_key(
privacy_request_id=pr.id,
identity_attribute="email",
)
assert cache.get(fidesops_identity_key) == identity["email"]
persisted_identity = pr.get_persisted_identity()
assert persisted_identity.email == TEST_EMAIL
assert persisted_identity.phone_number == TEST_PHONE_NUMBER

pr.delete(db=db)
assert run_access_request_mock.called

Expand Down
Loading

0 comments on commit 9c028f4

Please sign in to comment.