From 08808c6cafcfe961e7be1d1dcdb9ae158a2f275d Mon Sep 17 00:00:00 2001 From: Amogh Desai Date: Fri, 28 Jun 2024 17:10:56 +0530 Subject: [PATCH] Validating provider description for urls in provider list view (#40475) * Validating provider description for urls in provider list view * adding unit tests --------- Co-authored-by: adesai (cherry picked from commit f18f48492dc69f392e45567580b6ddb0c070ea58) --- airflow/www/views.py | 9 ++++++++- tests/www/views/test_views.py | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/airflow/www/views.py b/airflow/www/views.py index 3faed00a38228d..781f1b199be43c 100644 --- a/airflow/www/views.py +++ b/airflow/www/views.py @@ -36,7 +36,7 @@ from json import JSONDecodeError from pathlib import Path from typing import TYPE_CHECKING, Any, Collection, Iterator, Mapping, MutableMapping, Sequence -from urllib.parse import unquote, urljoin, urlsplit +from urllib.parse import unquote, urljoin, urlparse, urlsplit import configupdater import flask.json @@ -4321,6 +4321,13 @@ def _clean_description(self, description): def _build_link(match_obj): text = match_obj.group(1) url = match_obj.group(2) + + # parsing the url to check if ita a valid url + parsed_url = urlparse(url) + if not (parsed_url.scheme == "http" or parsed_url.scheme == "https"): + # returning the original raw text + return escape(match_obj.group(0)) + return Markup(f'{text}') cd = escape(description) diff --git a/tests/www/views/test_views.py b/tests/www/views/test_views.py index 27f096403f05d7..2a7412ff3bf63c 100644 --- a/tests/www/views/test_views.py +++ b/tests/www/views/test_views.py @@ -22,6 +22,7 @@ from unittest import mock import pytest +from markupsafe import Markup from airflow.configuration import ( initialize_config, @@ -31,6 +32,7 @@ from airflow.plugins_manager import AirflowPlugin, EntryPointSource from airflow.utils.task_group import TaskGroup from airflow.www.views import ( + ProviderView, get_key_paths, get_safe_url, get_task_stats_from_query, @@ -139,6 +141,37 @@ def test_should_list_providers_on_page_with_details(admin_client): check_content_in_response("Providers", resp) +@pytest.mark.parametrize( + "provider_description, expected", + [ + ("`Airbyte `__", Markup('Airbyte')), + ( + "Amazon integration (including `Amazon Web Services (AWS) `__).", + Markup( + 'Amazon integration (including Amazon Web Services (' + "AWS))." + ), + ), + ( + "`Java Database Connectivity (JDBC) `__", + Markup( + 'Java ' + "Database Connectivity (JDBC)" + ), + ), + ( + "`click me `__", + Markup("`click me <javascript:prompt(document.domain)>`__"), + ), + ], +) +def test__clean_description(admin_client, provider_description, expected): + p = ProviderView() + actual = p._clean_description(provider_description) + assert actual == expected + + def test_endpoint_should_not_be_unauthenticated(app): resp = app.test_client().get("/provider", follow_redirects=True) check_content_not_in_response("Providers", resp)