diff --git a/api/src/pcapi/scripts/provider_clean_old_integration_data/main.py b/api/src/pcapi/scripts/provider_clean_old_integration_data/main.py new file mode 100644 index 00000000000..87e4f708d08 --- /dev/null +++ b/api/src/pcapi/scripts/provider_clean_old_integration_data/main.py @@ -0,0 +1,101 @@ +import logging + +from sqlalchemy import text + +from pcapi import settings +from pcapi.core.providers import models as providers_models +from pcapi.flask_app import app +from pcapi.models import db +from pcapi.repository import transaction + + +logger = logging.getLogger(__name__) + +_LEGACY_API_PROVIDERS_IDS = [ + 15, # TiteLive Stocks (Epagine / Place des libraires.com) + 59, # Praxiel/Inférence + 58, # FNAC + 23, # www.leslibraires.fr + 66, # Decitre + 63, # Librisoft + 68, # TMIC-Ellipses + 65, # Mollat + 67, # CDI-Bookshop +] + +_BATCH_SIZE = 1000 + + +def _clean_id_at_providers(provider_ids: list[int], batch_size: int = _BATCH_SIZE) -> None: + # Disabling statement_timeout as we can't know in advance how long it would take + db.session.execute(text("SET statement_timeout = '300s';")) + + # create procedure + db.session.execute( + text( + """ + CREATE OR REPLACE PROCEDURE update_deprecated_provider_offer_batch(provider_id_list INT[], batch_size INT) + LANGUAGE plpgsql + AS $$ + BEGIN + LOOP + -- Begin a new transaction + BEGIN + WITH batch AS ( + SELECT "id" + FROM offer + WHERE "lastProviderId" = ANY (provider_id_list) + AND "idAtProvider" IS NOT NULL + LIMIT batch_size + ) + + UPDATE offer SET "idAtProvider" = NULL WHERE "id" IN (SELECT "id" FROM batch); + + -- Exit the loop if no rows were updated + EXIT WHEN NOT FOUND; + EXCEPTION + -- Rollback the transaction in case of any error + WHEN OTHERS THEN + ROLLBACK; + -- Optionally, you can raise an exception to stop the procedure + RAISE; + END; + END LOOP; + END $$; + """ + ) + ) + + db.session.execute( + text("""CALL update_deprecated_provider_offer_batch(:provider_id_list, :batch_size);"""), + params={"provider_id_list": provider_ids, "batch_size": batch_size}, + ) + + # delete procedure + db.session.execute(text("""DROP PROCEDURE IF EXISTS update_deprecated_provider_offer_batch;""")) + + # According to PostgreSQL, setting such values this way is affecting only the current session + # but let's be defensive by setting back to the original values + db.session.execute(text(f"SET statement_timeout = {settings.DATABASE_STATEMENT_TIMEOUT}")) + + +def clean_old_provider_data(provider_ids: list[int]) -> None: + # Update providers + for provider_id in provider_ids: + with transaction(): + provider = providers_models.Provider.query.get(provider_id) + + logger.info("Cleaning data for provider %s (id: %s)", provider.name, provider.id) + + if "[DÉPRÉCIÉ]" not in provider.name: + provider.name = f"[DÉPRÉCIÉ] {provider.name}" + provider.enabledForPro = False + provider.isActive = False + + # Update providers offers + _clean_id_at_providers(provider_ids) + + +if __name__ == "__main__": + app.app_context().push() + clean_old_provider_data(_LEGACY_API_PROVIDERS_IDS) diff --git a/api/tests/scripts/provider_clean_old_integraiton_data/main_test.py b/api/tests/scripts/provider_clean_old_integraiton_data/main_test.py new file mode 100644 index 00000000000..076d7f0733e --- /dev/null +++ b/api/tests/scripts/provider_clean_old_integraiton_data/main_test.py @@ -0,0 +1,58 @@ +import pytest + +from pcapi.core.offers import factories as offers_factories +from pcapi.core.providers import factories as providers_factories +from pcapi.models import db +from pcapi.scripts.provider_clean_old_integration_data.main import _clean_id_at_providers +from pcapi.scripts.provider_clean_old_integration_data.main import clean_old_provider_data + + +@pytest.mark.usefixtures("db_session") +def test_clean_old_provider_data(): + provider_1 = providers_factories.ProviderFactory(name="Old Provider that should be deprecated") + provider_already_deprecated = providers_factories.ProviderFactory(name="[DÉPRÉCIÉ] Old Provider") + provider_3 = providers_factories.ProviderFactory() + offer_provider_1 = offers_factories.ThingOfferFactory(lastProvider=provider_1, idAtProvider="12345") + offer_provider_2 = offers_factories.EventOfferFactory(lastProvider=provider_already_deprecated, idAtProvider=None) + offer_provider_3 = offers_factories.ThingOfferFactory(lastProvider=provider_3, idAtProvider="offerId3") + + clean_old_provider_data([provider_1.id, provider_already_deprecated.id]) + + db.session.refresh(offer_provider_1) + db.session.refresh(offer_provider_2) + db.session.refresh(offer_provider_3) + + # should be deprecated + assert provider_1.name == "[DÉPRÉCIÉ] Old Provider that should be deprecated" + assert not provider_1.enabledForPro + assert not provider_1.isActive + assert not offer_provider_1.idAtProvider + assert provider_already_deprecated.name == "[DÉPRÉCIÉ] Old Provider" + assert not provider_already_deprecated.enabledForPro + assert not provider_already_deprecated.isActive + assert not offer_provider_2.idAtProvider + + # should stay the same + assert offer_provider_3.idAtProvider + assert provider_3.enabledForPro + assert provider_3.isActive + + +@pytest.mark.usefixtures("db_session") +def test_clean_id_at_providers(): + provider_1 = providers_factories.ProviderFactory(name="Old Provider that should be deprecated") + provider_3 = providers_factories.ProviderFactory() + offer_provider_1 = offers_factories.ThingOfferFactory(lastProvider=provider_1, idAtProvider="12345") + offer_provider_2 = offers_factories.ThingOfferFactory(lastProvider=provider_1, idAtProvider="12346") + offer_provider_3 = offers_factories.ThingOfferFactory(lastProvider=provider_1, idAtProvider="12347") + offer_provider_4 = offers_factories.ThingOfferFactory(lastProvider=provider_3, idAtProvider="offerId3") + + _clean_id_at_providers([provider_1.id], batch_size=2) + + # should be deprecated + assert not offer_provider_1.idAtProvider + assert not offer_provider_2.idAtProvider + assert not offer_provider_3.idAtProvider + + # should stay the same + assert offer_provider_4.idAtProvider