Skip to content

Commit

Permalink
Hash API keys (#3842)
Browse files Browse the repository at this point in the history
* Store hashed API keys

Aleph used to store user API keys as plaintext in the database. This commit changes that to store only a hash of the API key.

API keys are generated using the built-in `secrets.token_urlsafe` method which returns a random 256 bit token. In contrast to passwords, API keys are not provided by users, have a high entropy, and need to be validated on every request. It seems to be generally accepted that, given 256 bit tokens, salting or using an expensive key derivation functions isn't necessary. For this reason, we’re storing an unsalted SHA-256 hash of the API key which also makes it easy to look up and verify a given API key.

I've added a separate column for the hashed API key rather than reusing the existing column. This allows us to batch-hash all existing plaintext keys without having to differentiate between keys that have already been hashed and those that haven't. Once all existing plaintext API keys have been hashed, the old `api_key` column can simply be dropped.

* Add CLI command to store legacy plaintext API keys

* Remove prefilled API key from OpenRefine endpoints

Required as we do not store plaintext API keys anymore. Also, we want to remove the option to pass API keys via URL parameters in the future.

This makes it impossible to use OpenRefine with non-public collections. This was never documented, and most users weren't aware that they can indeed use OpenRefine with non-public collections anyway.
  • Loading branch information
tillprochaska authored Dec 10, 2024
1 parent 3c154e3 commit 96c0492
Show file tree
Hide file tree
Showing 9 changed files with 127 additions and 28 deletions.
33 changes: 28 additions & 5 deletions aleph/logic/api_keys.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from aleph.model.common import make_token
from aleph.logic.mail import email_role
from aleph.logic.roles import update_role
from aleph.logic.util import ui_url
from aleph.logic.util import ui_url, hash_api_key

# Number of days after which API keys expire
API_KEY_EXPIRATION_DAYS = 90
Expand All @@ -29,15 +29,16 @@ def generate_user_api_key(role):
email_role(role, subject, html=html, plain=plain)

now = datetime.datetime.now(datetime.timezone.utc).replace(microsecond=0)
role.api_key = make_token()
api_key = make_token()
role.api_key_digest = hash_api_key(api_key)
role.api_key_expires_at = now + datetime.timedelta(days=API_KEY_EXPIRATION_DAYS)
role.api_key_expiration_notification_sent = None

db.session.add(role)
db.session.commit()
update_role(role)

return role.api_key
return api_key


def send_api_key_expiration_notifications():
Expand Down Expand Up @@ -70,7 +71,7 @@ def _send_api_key_expiration_notification(
query = query.where(
and_(
and_(
Role.api_key != None, # noqa: E711
Role.api_key_digest != None, # noqa: E711
func.date(Role.api_key_expires_at) <= threshold,
),
or_(
Expand Down Expand Up @@ -104,10 +105,32 @@ def reset_api_key_expiration():
query = query.yield_per(500)
query = query.where(
and_(
Role.api_key != None, # noqa: E711
Role.api_key_digest != None, # noqa: E711
Role.api_key_expires_at == None, # noqa: E711
)
)

query.update({Role.api_key_expires_at: expires_at})
db.session.commit()


def hash_plaintext_api_keys():
query = Role.all_users()
query = query.yield_per(250)
query = query.where(
and_(
Role.api_key != None, # noqa: E711
Role.api_key_digest == None, # noqa: E711
)
)

results = db.session.execute(query).scalars()

for index, partition in enumerate(results.partitions()):
for role in partition:
role.api_key_digest = hash_api_key(role.api_key)
role.api_key = None
db.session.add(role)
log.info(f"Hashing API key: {role}")
log.info(f"Comitting partition {index}")
db.session.commit()
9 changes: 9 additions & 0 deletions aleph/logic/util.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import jwt
import hashlib
from normality import ascii_text
from urllib.parse import urlencode, urljoin
from datetime import datetime, timedelta
Expand Down Expand Up @@ -58,3 +59,11 @@ def archive_token(token):
token = jwt.decode(token, key=SETTINGS.SECRET_KEY, algorithms=DECODE, verify=True)
expire = datetime.utcfromtimestamp(token["exp"])
return token.get("c"), token.get("f"), token.get("m"), expire


def hash_api_key(api_key):
if api_key is None:
return None

digest = hashlib.sha256(api_key.encode("utf-8")).hexdigest()
return f"sha256${digest}"
11 changes: 10 additions & 1 deletion aleph/manage.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,10 @@
from aleph.queues import get_status, cancel_queue
from aleph.queues import get_active_dataset_status
from aleph.index.admin import delete_index
from aleph.logic.api_keys import reset_api_key_expiration as _reset_api_key_expiration
from aleph.logic.api_keys import (
reset_api_key_expiration as _reset_api_key_expiration,
hash_plaintext_api_keys as _hash_plaintext_api_keys,
)
from aleph.index.entities import iter_proxies
from aleph.index.util import AlephOperationalException
from aleph.logic.collections import create_collection, update_collection
Expand Down Expand Up @@ -597,3 +600,9 @@ def evilshit():
def reset_api_key_expiration():
"""Reset the expiration date of all legacy, non-expiring API keys."""
_reset_api_key_expiration()


@cli.command()
def hash_plaintext_api_keys():
"""Hash legacy plaintext API keys."""
_hash_plaintext_api_keys()
28 changes: 28 additions & 0 deletions aleph/migrate/versions/31e24765dee3_add_api_key_digest_column.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
"""Add api_key_digest column
Revision ID: 31e24765dee3
Revises: d46fc882ec6b
Create Date: 2024-07-04 11:07:19.915782
"""

# revision identifiers, used by Alembic.
revision = "31e24765dee3"
down_revision = "d46fc882ec6b"

from alembic import op
import sqlalchemy as sa


def upgrade():
op.add_column("role", sa.Column("api_key_digest", sa.Unicode()))
op.create_index(
index_name="ix_role_api_key_digest",
table_name="role",
columns=["api_key_digest"],
unique=True,
)


def downgrade():
op.drop_column("role", "api_key_digest")
12 changes: 9 additions & 3 deletions aleph/model/role.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from aleph.settings import SETTINGS
from aleph.model.common import SoftDeleteModel, IdModel, query_like
from aleph.util import anonymize_email
from aleph.logic.util import hash_api_key

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -52,6 +53,7 @@ class Role(db.Model, IdModel, SoftDeleteModel):
email = db.Column(db.Unicode, nullable=True)
type = db.Column(db.Enum(*TYPES, name="role_type"), nullable=False)
api_key = db.Column(db.Unicode, nullable=True)
api_key_digest = db.Column(db.Unicode, nullable=True)
api_key_expires_at = db.Column(db.DateTime, nullable=True)
api_key_expiration_notification_sent = db.Column(db.Integer, nullable=True)
is_admin = db.Column(db.Boolean, nullable=False, default=False)
Expand All @@ -72,7 +74,7 @@ def has_password(self):

@property
def has_api_key(self):
return self.api_key is not None
return self.api_key_digest is not None

@property
def is_public(self):
Expand Down Expand Up @@ -197,10 +199,13 @@ def by_email(cls, email):
def by_api_key(cls, api_key):
if api_key is None:
return None

q = cls.all()
q = q.filter_by(api_key=api_key)
utcnow = datetime.now(timezone.utc)

digest = hash_api_key(api_key)
q = q.filter(cls.api_key_digest == digest)

utcnow = datetime.now(timezone.utc)
# TODO: Exclude API keys without expiration date after deadline
# See https://github.com/alephdata/aleph/issues/3729
q = q.filter(
Expand All @@ -212,6 +217,7 @@ def by_api_key(cls, api_key):

q = q.filter(cls.type == cls.USER)
q = q.filter(cls.is_blocked == False) # noqa

return q.first()

@classmethod
Expand Down
38 changes: 31 additions & 7 deletions aleph/tests/test_api_keys.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,33 +5,35 @@
from aleph.logic.api_keys import (
generate_user_api_key,
send_api_key_expiration_notifications,
hash_plaintext_api_keys,
)
from aleph.logic.util import hash_api_key
from aleph.tests.util import TestCase


class ApiKeysTestCase(TestCase):
def test_generate_user_api_key(self):
role = self.create_user()
assert role.api_key is None
assert role.api_key_digest is None
assert role.api_key_expires_at is None

with time_machine.travel("2024-01-01T00:00:00Z"):
generate_user_api_key(role)
db.session.refresh(role)
assert role.api_key is not None
assert role.api_key_digest is not None
assert role.api_key_expires_at.date() == datetime.date(2024, 3, 31)

old_key = role.api_key
old_digest = role.api_key_digest

with time_machine.travel("2024-02-01T00:00:00Z"):
generate_user_api_key(role)
db.session.refresh(role)
assert role.api_key != old_key
assert role.api_key_digest != old_digest
assert role.api_key_expires_at.date() == datetime.date(2024, 5, 1)

def test_generate_user_api_key_notification(self):
role = self.create_user(email="john.doe@example.org")
assert role.api_key is None
assert role.api_key_digest is None

with mail.record_messages() as outbox:
assert len(outbox) == 0
Expand Down Expand Up @@ -65,7 +67,7 @@ def test_send_api_key_expiration_notifications(self):
assert len(outbox) == 1
assert outbox[0].subject == "[Aleph] API key generated"

assert role.api_key is not None
assert role.api_key_digest is not None
assert role.api_key_expires_at.date() == datetime.date(2024, 3, 31)

assert len(outbox) == 1
Expand Down Expand Up @@ -122,7 +124,7 @@ def test_send_api_key_expiration_notifications(self):

def test_send_api_key_expiration_notifications_no_key(self):
role = self.create_user(email="john.doe@example.org")
assert role.api_key is None
assert role.api_key_digest is None

with mail.record_messages() as outbox:
assert len(outbox) == 0
Expand Down Expand Up @@ -193,3 +195,25 @@ def test_send_api_key_expiration_notifications_regenerate(self):

assert outbox[4].subject == "[Aleph] Your API key will expire in 7 days"
assert outbox[5].subject == "[Aleph] Your API key has expired"

def test_hash_plaintext_api_keys(self):
user_1 = self.create_user(foreign_id="user_1", email="user1@example.org")
user_1.api_key = "1234567890"
user_1.api_key_digest = None

user_2 = self.create_user(foreign_id="user_2", email="user2@example.org")
user_2.api_key = None
user_2.api_key_digest = None

db.session.add_all([user_1, user_2])
db.session.commit()

hash_plaintext_api_keys()

db.session.refresh(user_1)
assert user_1.api_key is None
assert user_1.api_key_digest == hash_api_key("1234567890")

db.session.refresh(user_2)
assert user_2.api_key is None
assert user_2.api_key_digest is None
13 changes: 7 additions & 6 deletions aleph/tests/test_role_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from aleph.model import Role
from aleph.tests.factories.models import RoleFactory
from aleph.logic.roles import create_user, create_group
from aleph.logic.util import hash_api_key

from aleph.tests.util import TestCase

Expand Down Expand Up @@ -83,7 +84,7 @@ def test_remove_role(self):

def test_role_by_api_key(self):
role_ = self.create_user()
role_.api_key = "1234567890"
role_.api_key_digest = hash_api_key("1234567890")
db.session.add(role_)
db.session.commit()

Expand All @@ -93,7 +94,7 @@ def test_role_by_api_key(self):

def test_role_by_api_key_empty(self):
role_ = self.create_user()
assert role_.api_key is None
assert role_.api_key_digest is None

role = Role.by_api_key(None)
assert role is None
Expand All @@ -103,26 +104,26 @@ def test_role_by_api_key_empty(self):

def test_role_by_api_key_expired(self):
role_ = self.create_user()
role_.api_key = "1234567890"
role_.api_key_digest = hash_api_key("1234567890")
role_.api_key_expires_at = datetime.datetime(2024, 3, 31, 0, 0, 0)
db.session.add(role_)
db.session.commit()

with time_machine.travel("2024-03-30T23:59:59Z"):
print(role_.api_key_expires_at)
role = Role.by_api_key(role_.api_key)
role = Role.by_api_key("1234567890")
assert role is not None
assert role.id == role_.id

with time_machine.travel("2024-03-31T00:00:00Z"):
role = Role.by_api_key(role_.api_key)
role = Role.by_api_key("1234567890")
assert role is None

def test_role_by_api_key_legacy_without_expiration(self):
# Ensure that legacy API keys that were created without an expiration
# date continue to work.
role_ = self.create_user()
role_.api_key = "1234567890"
role_.api_key_digest = hash_api_key("1234567890")
role_.api_key_expires_at = None
db.session.add(role_)
db.session.commit()
Expand Down
9 changes: 5 additions & 4 deletions aleph/tests/test_view_context.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
from aleph.core import db
from aleph.tests.util import TestCase
from aleph.logic.util import hash_api_key


class ViewContextTest(TestCase):
def setUp(self):
super().setUp()
self.role = self.create_user(email="john.doe@example.org")
self.role.set_password("12345678")
self.role.api_key = "1234567890"
self.role.api_key_digest = hash_api_key("1234567890")

self.other_role = self.create_user(
foreign_id="other",
Expand Down Expand Up @@ -47,12 +48,12 @@ def test_authz_header_session_token_invalid(self):
assert res.status_code == 401

def test_authz_header_api_key(self):
headers = {"Authorization": f"ApiKey {self.role.api_key}"}
headers = {"Authorization": "ApiKey 1234567890"}
res = self.client.get(f"/api/2/roles/{self.role.id}", headers=headers)
assert res.status_code == 200
assert res.json["email"] == "john.doe@example.org"

headers = {"Authorization": self.role.api_key}
headers = {"Authorization": "1234567890"}
res = self.client.get(f"/api/2/roles/{self.role.id}", headers=headers)
assert res.status_code == 200
assert res.json["email"] == "john.doe@example.org"
Expand Down Expand Up @@ -83,7 +84,7 @@ def test_authz_header_api_key_invalid(self):
assert res.status_code == 403

def test_authz_url_param_api_key(self):
query_string = {"api_key": self.role.api_key}
query_string = {"api_key": "1234567890"}
res = self.client.get(f"/api/2/roles/{self.role.id}", query_string=query_string)
assert res.status_code == 200
assert res.json["email"] == "john.doe@example.org"
Expand Down
2 changes: 0 additions & 2 deletions aleph/views/reconcile_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,6 @@ def reconcile_index(collection=None):
domain = SETTINGS.APP_UI_URL.strip("/")
label = SETTINGS.APP_TITLE
suggest_query = []
if request.authz.id:
suggest_query.append(("api_key", request.authz.role.api_key))
schemata = list(model)
if collection is not None:
label = "%s (%s)" % (collection.get("label"), label)
Expand Down

0 comments on commit 96c0492

Please sign in to comment.