Skip to content

Commit ffaec28

Browse files
authored
Validate and sign mime types and file extensions for file uploads (#2860)
* Validate and sign mime types for file uploads * add checks for file extensions
1 parent b9d02ae commit ffaec28

File tree

5 files changed

+231
-44
lines changed

5 files changed

+231
-44
lines changed

care/emr/models/file_upload.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from care.emr.utils.file_manager import S3FilesManager
88
from care.users.models import User
99
from care.utils.csp.config import BucketType
10+
from care.utils.models.validators import parse_file_extension
1011

1112

1213
class FileUpload(EMRBaseModel):
@@ -32,9 +33,8 @@ class FileUpload(EMRBaseModel):
3233
files_manager = S3FilesManager(BucketType.PATIENT)
3334

3435
def get_extension(self):
35-
# TODO: improve this logic to handle files with multiple extensions
36-
parts = self.internal_name.split(".")
37-
return f".{parts[-1]}" if len(parts) > 1 else ""
36+
extensions = parse_file_extension(self.internal_name)
37+
return f".{".".join(extensions)}" if extensions else ""
3838

3939
def save(self, *args, **kwargs):
4040
"""

care/emr/resources/file_upload/spec.py

+28-1
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
import datetime
22
from enum import Enum
33

4-
from pydantic import UUID4
4+
from django.conf import settings
5+
from django.core.exceptions import ValidationError
6+
from pydantic import UUID4, field_validator
57

68
from care.emr.models import FileUpload
79
from care.emr.resources.base import EMRResource
810
from care.emr.resources.user.spec import UserSpec
11+
from care.utils.models.validators import file_name_validator
912

1013

1114
class FileTypeChoices(str, Enum):
@@ -37,11 +40,33 @@ class FileUploadCreateSpec(FileUploadBaseSpec):
3740
file_type: FileTypeChoices
3841
file_category: FileCategoryChoices
3942
associating_id: str
43+
mime_type: str
4044

4145
def perform_extra_deserialization(self, is_update, obj):
4246
# Authz Performed in the request
4347
obj._just_created = True # noqa SLF001
4448
obj.internal_name = self.original_name
49+
obj.meta["mime_type"] = self.mime_type
50+
51+
@field_validator("mime_type")
52+
@classmethod
53+
def validate_mime_type(cls, mime_type: str):
54+
if mime_type not in settings.ALLOWED_MIME_TYPES:
55+
err = "Invalid mime type"
56+
raise ValueError(err)
57+
return mime_type
58+
59+
@field_validator("original_name")
60+
@classmethod
61+
def validate_original_name(cls, original_name: str):
62+
if not original_name:
63+
err = "File name cannot be empty"
64+
raise ValueError(err)
65+
try:
66+
file_name_validator(original_name)
67+
except ValidationError as e:
68+
raise ValueError(e.message) from e
69+
return original_name
4570

4671

4772
class FileUploadListSpec(FileUploadBaseSpec):
@@ -56,11 +81,13 @@ class FileUploadListSpec(FileUploadBaseSpec):
5681
created_date: datetime.datetime
5782
extension: str
5883
uploaded_by: dict
84+
mime_type: str
5985

6086
@classmethod
6187
def perform_extra_serialization(cls, mapping, obj):
6288
mapping["id"] = obj.external_id
6389
mapping["extension"] = obj.get_extension()
90+
mapping["mime_type"] = obj.meta.get("mime_type")
6491
if obj.created_by:
6592
mapping["uploaded_by"] = UserSpec.serialize(obj.created_by)
6693

care/emr/utils/file_manager.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,10 @@ def signed_url(self, file_obj, duration=60 * 60, mime_type=None):
2222
"Bucket": bucket_name,
2323
"Key": f"{file_obj.file_type}/{file_obj.internal_name}",
2424
}
25-
if mime_type:
26-
params["ContentType"] = mime_type
25+
26+
_mime_type = file_obj.meta.get("mime_type") or mime_type
27+
if _mime_type:
28+
params["ContentType"] = _mime_type
2729
return s3.generate_presigned_url(
2830
"put_object",
2931
Params=params,

care/utils/models/validators.py

+88
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
import re
22
from collections.abc import Iterable
33
from fractions import Fraction
4+
from pathlib import Path
45

56
import jsonschema
7+
from django.conf import settings
68
from django.core import validators
79
from django.core.exceptions import ValidationError
810
from django.core.files.uploadedfile import UploadedFile
@@ -348,3 +350,89 @@ def _humanize_bytes(self, size: int) -> str:
348350
custom_image_extension_validator = validators.FileExtensionValidator(
349351
allowed_extensions=["jpg", "jpeg", "png"]
350352
)
353+
354+
355+
def parse_file_extension(file_name: str, max_extensions: int = 1) -> list[str]:
356+
"""
357+
Extract up to `max_extensions` file extensions.
358+
359+
- "file.tar.gz" -> ['tar', 'gz']
360+
- "file.tar.bz2.xz" (max 3) -> ['tar', 'bz2', 'xz']
361+
- "file" -> []
362+
363+
Returns a list of extensions in lowercase (without the dot).
364+
"""
365+
path = Path(file_name)
366+
extensions = [ext[1:].lower() for ext in path.suffixes] # Remove leading dots
367+
368+
return extensions[-max_extensions:] # Keep only the last `max_extensions`
369+
370+
371+
class FileNameValidator:
372+
"""
373+
This validator is used to validate the file name length and extension.
374+
- File name length should not exceed `max_length` characters.
375+
- File name should not start with a dot.
376+
- File outermost extension should not be in `blocked_extensions` if provided.
377+
- File outermost extension should be in `allowed_extensions` if provided.
378+
"""
379+
380+
def __init__(
381+
self,
382+
max_length: int = 255,
383+
max_extensions: int = 1,
384+
allowed_extensions: set[str] | None = None,
385+
blocked_extensions: set[str] | None = None,
386+
):
387+
self.max_length = max_length
388+
self.max_extensions = max_extensions
389+
self.allowed_extensions = allowed_extensions or set()
390+
self.blocked_extensions = blocked_extensions or set()
391+
392+
def __call__(self, file_name: str):
393+
if len(file_name) > self.max_length:
394+
raise ValidationError(
395+
_("File name cannot exceed %(max_length)d characters.")
396+
% {"max_length": self.max_length}
397+
)
398+
399+
if file_name.startswith("."):
400+
raise ValidationError(
401+
_("File name cannot start with a dot."),
402+
)
403+
404+
extensions = parse_file_extension(file_name, self.max_extensions)
405+
if not extensions:
406+
raise ValidationError(_("Invalid file extension."))
407+
408+
extension = extensions[-1].lower()
409+
if self.blocked_extensions and extension in self.blocked_extensions:
410+
raise ValidationError(
411+
_(
412+
"File extension not allowed. Blocked extensions are: %(blocked_extensions)s"
413+
)
414+
% {"blocked_extensions": ", ".join(self.blocked_extensions)},
415+
)
416+
417+
if self.allowed_extensions and extension not in self.allowed_extensions:
418+
raise ValidationError(
419+
_(
420+
"File extension not allowed. Allowed extensions are: %(allowed_extensions)s"
421+
)
422+
% {"allowed_extensions": ", ".join(self.allowed_extensions)},
423+
)
424+
425+
def __eq__(self, other):
426+
return (
427+
isinstance(other, FileNameValidator)
428+
and self.max_length == other.max_length
429+
and self.max_extensions == other.max_extensions
430+
and self.allowed_extensions == other.allowed_extensions
431+
and self.blocked_extensions == other.blocked_extensions
432+
)
433+
434+
435+
file_name_validator = FileNameValidator(
436+
allowed_extensions=settings.ALLOWED_FILE_EXTENSIONS,
437+
blocked_extensions=settings.BLOCKED_FILE_EXTENSIONS,
438+
)

config/settings/base.py

+108-38
Original file line numberDiff line numberDiff line change
@@ -569,46 +569,116 @@
569569
),
570570
)
571571

572-
ALLOWED_MIME_TYPES = env.list(
573-
"ALLOWED_MIME_TYPES",
574-
default=[
575-
# Images
576-
"image/jpeg",
577-
"image/png",
578-
"image/gif",
579-
"image/bmp",
580-
"image/webp",
581-
"image/svg+xml",
582-
# Videos
583-
"video/mp4",
584-
"video/mpeg",
585-
"video/x-msvideo",
586-
"video/quicktime",
587-
"video/x-ms-wmv",
588-
"video/x-flv",
589-
"video/webm",
590-
# Audio
591-
"audio/mpeg",
592-
"audio/wav",
593-
"audio/aac",
594-
"audio/ogg",
595-
"audio/midi",
596-
"audio/x-midi",
597-
"audio/webm",
598-
"audio/mp4",
599-
# Documents
600-
"text/plain",
601-
"text/csv",
602-
"application/rtf",
603-
"application/msword",
604-
"application/vnd.oasis.opendocument.text",
605-
"application/pdf",
606-
"application/vnd.ms-excel",
607-
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
608-
"application/vnd.oasis.opendocument.spreadsheet",
609-
],
572+
ALLOWED_MIME_TYPES = set(
573+
env.list(
574+
"ALLOWED_MIME_TYPES",
575+
default=[
576+
# Images
577+
"image/jpeg",
578+
"image/png",
579+
"image/gif",
580+
"image/bmp",
581+
"image/webp",
582+
"image/svg+xml",
583+
# Videos
584+
"video/mp4",
585+
"video/mpeg",
586+
"video/x-msvideo",
587+
"video/quicktime",
588+
"video/x-ms-wmv",
589+
"video/x-flv",
590+
"video/webm",
591+
# Audio
592+
"audio/mpeg",
593+
"audio/wav",
594+
"audio/aac",
595+
"audio/ogg",
596+
"audio/midi",
597+
"audio/x-midi",
598+
"audio/webm",
599+
"audio/mp4",
600+
# Documents
601+
"text/plain",
602+
"text/csv",
603+
"application/rtf",
604+
"application/msword",
605+
"application/vnd.oasis.opendocument.text",
606+
"application/pdf",
607+
"application/vnd.ms-excel",
608+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
609+
"application/vnd.oasis.opendocument.spreadsheet",
610+
],
611+
)
612+
)
613+
614+
ALLOWED_FILE_EXTENSIONS = set(
615+
env.list(
616+
"ALLOWED_FILE_EXTENSIONS",
617+
default=[
618+
# Images
619+
"jpg",
620+
"jpeg",
621+
"png",
622+
"gif",
623+
"bmp",
624+
"webp",
625+
"svg",
626+
# Videos
627+
"mp4",
628+
"mpeg",
629+
"avi",
630+
"mov",
631+
"wmv",
632+
"flv",
633+
"webm",
634+
# Audio
635+
"mp3",
636+
"wav",
637+
"aac",
638+
"ogg",
639+
"midi",
640+
"mid",
641+
"m4a",
642+
# Documents
643+
"txt",
644+
"csv",
645+
"rtf",
646+
"doc",
647+
"odt",
648+
"pdf",
649+
"xls",
650+
"xlsx",
651+
"ods",
652+
],
653+
)
610654
)
611655

656+
BLOCKED_FILE_EXTENSIONS = set(
657+
env.list(
658+
"BLOCKED_FILE_EXTENSIONS",
659+
default=[
660+
# Executable Files
661+
"exe",
662+
"dll",
663+
"msi",
664+
"msp",
665+
"mst",
666+
"com",
667+
"scr",
668+
"sys",
669+
"pif",
670+
# Registry Files
671+
"reg",
672+
# Script Files
673+
"bat",
674+
"cmd",
675+
"wsf",
676+
"sh",
677+
],
678+
)
679+
)
680+
681+
612682
FACILITY_S3_BUCKET = env("FACILITY_S3_BUCKET", default="")
613683
FACILITY_S3_REGION = env("FACILITY_S3_REGION_CODE", default=BUCKET_REGION)
614684
FACILITY_S3_KEY = env("FACILITY_S3_KEY", default=BUCKET_KEY)

0 commit comments

Comments
 (0)