From 4e64d28e9e9940af3288f2c51e3dbe52c380f154 Mon Sep 17 00:00:00 2001 From: Michael Mattig Date: Thu, 28 Nov 2024 17:05:38 +0100 Subject: [PATCH] add example for data usage --- examples/data_usage.ipynb | 375 ++++++++++++++++++++++++++++++++++++++ geoengine/__init__.py | 4 +- geoengine/workflow.py | 54 ++++++ setup.cfg | 2 +- 4 files changed, 433 insertions(+), 2 deletions(-) create mode 100644 examples/data_usage.ipynb diff --git a/examples/data_usage.ipynb b/examples/data_usage.ipynb new file mode 100644 index 00000000..8d2b8595 --- /dev/null +++ b/examples/data_usage.ipynb @@ -0,0 +1,375 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Access Usage Log as Data Producer" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import geoengine as ge\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Connect to the Virtual Data Trustee" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "ge.initialize(\"http://localhost:3030/api\", (\"admin@localhost\", \"adminadmin\")) # TODO: load from .env for demo TODO: set remote url" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Access Usage Log" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
computationIdcountdatatimestampuserId
0ca82e727-b1a5-4f0a-963d-e4379da4bcf36GdalSource2024-11-28 15:48:23.003000+00:00d5d3b34c-360c-4ed0-aedc-69c289222e7f
1ca82e727-b1a5-4f0a-963d-e4379da4bcf31OgrSource2024-11-28 15:48:13.003000+00:00d5d3b34c-360c-4ed0-aedc-69c289222e7f
20017ed59-a369-4869-8945-0eecf5f09bf51OgrSource2024-11-28 15:39:12.932000+00:00d5d3b34c-360c-4ed0-aedc-69c289222e7f
321799f3d-2a70-474b-8c09-0792921138672GdalSource2024-11-28 15:39:12.932000+00:00d5d3b34c-360c-4ed0-aedc-69c289222e7f
435208ed7-f6f2-4f26-a85f-c3b1f8b6d9d92GdalSource2024-11-28 15:39:12.932000+00:00d5d3b34c-360c-4ed0-aedc-69c289222e7f
53d2e3da4-f678-4b6f-bbe3-329ba7ab38704GdalSource2024-11-28 15:39:12.932000+00:00d5d3b34c-360c-4ed0-aedc-69c289222e7f
647661177-49e7-4fc7-95af-6112b3707b3e4GdalSource2024-11-28 15:39:12.932000+00:00d5d3b34c-360c-4ed0-aedc-69c289222e7f
75e58efe5-6d56-4369-b396-a5d36b6327172GdalSource2024-11-28 15:39:12.932000+00:00d5d3b34c-360c-4ed0-aedc-69c289222e7f
86a0d85c8-ab99-4fef-b9ef-cefcaa5e0f9c2GdalSource2024-11-28 15:39:12.932000+00:00d5d3b34c-360c-4ed0-aedc-69c289222e7f
96a5239be-f9a8-44fc-a9da-02d7ab1c02bc2GdalSource2024-11-28 15:39:12.932000+00:00d5d3b34c-360c-4ed0-aedc-69c289222e7f
\n", + "
" + ], + "text/plain": [ + " computationId count data \\\n", + "0 ca82e727-b1a5-4f0a-963d-e4379da4bcf3 6 GdalSource \n", + "1 ca82e727-b1a5-4f0a-963d-e4379da4bcf3 1 OgrSource \n", + "2 0017ed59-a369-4869-8945-0eecf5f09bf5 1 OgrSource \n", + "3 21799f3d-2a70-474b-8c09-079292113867 2 GdalSource \n", + "4 35208ed7-f6f2-4f26-a85f-c3b1f8b6d9d9 2 GdalSource \n", + "5 3d2e3da4-f678-4b6f-bbe3-329ba7ab3870 4 GdalSource \n", + "6 47661177-49e7-4fc7-95af-6112b3707b3e 4 GdalSource \n", + "7 5e58efe5-6d56-4369-b396-a5d36b632717 2 GdalSource \n", + "8 6a0d85c8-ab99-4fef-b9ef-cefcaa5e0f9c 2 GdalSource \n", + "9 6a5239be-f9a8-44fc-a9da-02d7ab1c02bc 2 GdalSource \n", + "\n", + " timestamp userId \n", + "0 2024-11-28 15:48:23.003000+00:00 d5d3b34c-360c-4ed0-aedc-69c289222e7f \n", + "1 2024-11-28 15:48:13.003000+00:00 d5d3b34c-360c-4ed0-aedc-69c289222e7f \n", + "2 2024-11-28 15:39:12.932000+00:00 d5d3b34c-360c-4ed0-aedc-69c289222e7f \n", + "3 2024-11-28 15:39:12.932000+00:00 d5d3b34c-360c-4ed0-aedc-69c289222e7f \n", + "4 2024-11-28 15:39:12.932000+00:00 d5d3b34c-360c-4ed0-aedc-69c289222e7f \n", + "5 2024-11-28 15:39:12.932000+00:00 d5d3b34c-360c-4ed0-aedc-69c289222e7f \n", + "6 2024-11-28 15:39:12.932000+00:00 d5d3b34c-360c-4ed0-aedc-69c289222e7f \n", + "7 2024-11-28 15:39:12.932000+00:00 d5d3b34c-360c-4ed0-aedc-69c289222e7f \n", + "8 2024-11-28 15:39:12.932000+00:00 d5d3b34c-360c-4ed0-aedc-69c289222e7f \n", + "9 2024-11-28 15:39:12.932000+00:00 d5d3b34c-360c-4ed0-aedc-69c289222e7f " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "usage = ge.data_usage(0, 10)\n", + "usage" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Access Usage Summary" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
countdatasettimestamp
06GdalSource2024-11-28 15:48:00+00:00
11OgrSource2024-11-28 15:48:00+00:00
230GdalSource2024-11-28 15:39:00+00:00
31OgrSource2024-11-28 15:39:00+00:00
\n", + "
" + ], + "text/plain": [ + " count dataset timestamp\n", + "0 6 GdalSource 2024-11-28 15:48:00+00:00\n", + "1 1 OgrSource 2024-11-28 15:48:00+00:00\n", + "2 30 GdalSource 2024-11-28 15:39:00+00:00\n", + "3 1 OgrSource 2024-11-28 15:39:00+00:00" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "usage_summary = ge.data_usage_summary(ge.UsageSummaryGranularity.MINUTES)\n", + "usage_summary\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Plot usage summary" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ge.plot_data_usage_summary(ge.UsageSummaryGranularity.MINUTES)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "'DataFrame' object has no attribute 'to_datetime'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[10], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m pivot_df \u001b[38;5;241m=\u001b[39m usage_summary\u001b[38;5;241m.\u001b[39mpivot(index\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtimestamp\u001b[39m\u001b[38;5;124m'\u001b[39m, columns\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdataset\u001b[39m\u001b[38;5;124m'\u001b[39m, values\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcount\u001b[39m\u001b[38;5;124m'\u001b[39m)\u001b[38;5;241m.\u001b[39mfillna(\u001b[38;5;241m0\u001b[39m)\n\u001b[0;32m----> 2\u001b[0m pivot_df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtimestamp\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mpivot_df\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_datetime\u001b[49m(pivot_df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtimestamp\u001b[39m\u001b[38;5;124m'\u001b[39m])\u001b[38;5;241m.\u001b[39mdt\u001b[38;5;241m.\u001b[39mtz_localize(\u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[1;32m 4\u001b[0m pivot_df\n\u001b[1;32m 6\u001b[0m pivot_df\u001b[38;5;241m.\u001b[39mplot(kind\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbar\u001b[39m\u001b[38;5;124m'\u001b[39m, figsize\u001b[38;5;241m=\u001b[39m(\u001b[38;5;241m10\u001b[39m, \u001b[38;5;241m6\u001b[39m))\n", + "File \u001b[0;32m~/git/geoengine-python/env/lib/python3.10/site-packages/pandas/core/generic.py:5902\u001b[0m, in \u001b[0;36mNDFrame.__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 5895\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 5896\u001b[0m name \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_internal_names_set\n\u001b[1;32m 5897\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m name \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_metadata\n\u001b[1;32m 5898\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m name \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_accessors\n\u001b[1;32m 5899\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_info_axis\u001b[38;5;241m.\u001b[39m_can_hold_identifiers_and_holds_name(name)\n\u001b[1;32m 5900\u001b[0m ):\n\u001b[1;32m 5901\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m[name]\n\u001b[0;32m-> 5902\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mobject\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__getattribute__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[0;31mAttributeError\u001b[0m: 'DataFrame' object has no attribute 'to_datetime'" + ] + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "\n", + "pivot_df = usage_summary.pivot(index='timestamp', columns='dataset', values='count').fillna(0)\n", + "pivot_df['timestamp'] = pd.to_datetime(pivot_df['timestamp']).dt.tz_localize(None)\n", + "\n", + "pivot_df\n", + "\n", + "pivot_df.plot(kind='bar', figsize=(10, 6))\n", + "\n", + "plt.title('Data Usage by Dataset over time')\n", + "plt.xlabel('Timestamp')\n", + "plt.ylabel('Count')\n", + "plt.xticks(rotation=45)\n", + "plt.legend(title='Dataset')\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/geoengine/__init__.py b/geoengine/__init__.py index fca41ddb..7ec8d6ca 100644 --- a/geoengine/__init__.py +++ b/geoengine/__init__.py @@ -6,6 +6,7 @@ from geoengine_openapi_client.exceptions import BadRequestException, OpenApiException, ApiTypeError, ApiValueError, \ ApiKeyError, ApiAttributeError, ApiException, NotFoundException +from geoengine_openapi_client import UsageSummaryGranularity from .auth import Session, get_session, initialize, reset from .colorizer import Colorizer, ColorBreakpoint, LinearGradientColorizer, PaletteColorizer, \ LogarithmicGradientColorizer @@ -30,7 +31,8 @@ MultiBandRasterColorizer from .util import clamp_datetime_ms_ns -from .workflow import WorkflowId, Workflow, workflow_by_id, register_workflow, get_quota, update_quota +from .workflow import WorkflowId, Workflow, workflow_by_id, register_workflow, get_quota, update_quota, data_usage, \ + data_usage_summary, plot_data_usage_summary from .raster import RasterTile2D from .raster_workflow_rio_writer import RasterWorkflowRioWriter diff --git a/geoengine/workflow.py b/geoengine/workflow.py index e19a2273..c0200d2f 100644 --- a/geoengine/workflow.py +++ b/geoengine/workflow.py @@ -967,3 +967,57 @@ def update_quota(user_id: UUID, new_available_quota: int, timeout: int = 60) -> ), _request_timeout=timeout ) + + +def data_usage(offset: int = 0, limit: int = 10) -> List[geoengine_openapi_client.DataUsage]: + ''' + Get data usage + ''' + + session = get_session() + + with geoengine_openapi_client.ApiClient(session.configuration) as api_client: + user_api = geoengine_openapi_client.UserApi(api_client) + response = user_api.data_usage_handler(offset=offset, limit=limit) + + # create dataframe from response + usage_dicts = [data_usage.dict(by_alias=True) for data_usage in response] + df = pd.DataFrame(usage_dicts) + + return df + + +def data_usage_summary(granularity: geoengine_openapi_client.UsageSummaryGranularity, dataset: Optional[str] = None, offset: int = 0, limit: int = 10) -> List[geoengine_openapi_client.DataUsage]: + ''' + Get data usage summary + ''' + + session = get_session() + + with geoengine_openapi_client.ApiClient(session.configuration) as api_client: + user_api = geoengine_openapi_client.UserApi(api_client) + response = user_api.data_usage_summary_handler(dataset=dataset, granularity=granularity, offset=offset, limit=limit) + + # create dataframe from response + usage_dicts = [data_usage.dict(by_alias=True) for data_usage in response] + df = pd.DataFrame(usage_dicts) + + return df + + +def plot_data_usage_summary(granularity: geoengine_openapi_client.UsageSummaryGranularity, dataset: Optional[str] = None, offset: int = 0, limit: int = 10): + import matplotlib.pyplot as plt + import pandas as pd + + df = data_usage_summary(granularity, dataset, offset, limit) + df['timestamp'] = pd.to_datetime(df['timestamp']).dt.tz_localize(None) + + pivot_df = df.pivot(index='timestamp', columns='dataset', values='count').fillna(0) + pivot_df.plot(kind='bar', figsize=(10, 6)) + + plt.title('Data Usage by Dataset over time') + plt.xlabel('Timestamp') + plt.ylabel('Count') + plt.xticks(rotation=45) + plt.legend(title='Dataset') + plt.show() diff --git a/setup.cfg b/setup.cfg index 675ed7c4..ea9753f3 100644 --- a/setup.cfg +++ b/setup.cfg @@ -18,7 +18,7 @@ package_dir = packages = find: python_requires = >=3.9 install_requires = - geoengine-openapi-client == 0.0.17 + geoengine-openapi-client @ git+https://github.com/geo-engine/openapi-client@esg-quota#subdirectory=python # TODO update when merged geopandas >=0.9,<0.15 matplotlib >=3.5,<3.8 numpy >=1.21,<2