From 115bd7a3eeb0b90f6020e41dba63d3fff8657565 Mon Sep 17 00:00:00 2001 From: sylo Date: Tue, 12 Nov 2024 18:30:01 -0500 Subject: [PATCH 1/2] adding health monitoring tutorial notebook --- .../3_health_monitoring_analysis.ipynb | 284 ++++++++++++++++++ 1 file changed, 284 insertions(+) create mode 100644 tutorial_notebooks/3_health_monitoring_analysis.ipynb diff --git a/tutorial_notebooks/3_health_monitoring_analysis.ipynb b/tutorial_notebooks/3_health_monitoring_analysis.ipynb new file mode 100644 index 0000000..d033e21 --- /dev/null +++ b/tutorial_notebooks/3_health_monitoring_analysis.ipynb @@ -0,0 +1,284 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Temporal Scope Tutorial: Health Monitoring Analysis\n", + "\n", + "## Overview\n", + "\n", + "This tutorial demonstrates how to analyze temporal health data using the **TemporalScope** framework. We'll work with multiple health metrics to showcase both machine learning and deep learning approaches to temporal analysis.\n", + "\n", + "### Summary\n", + "\n", + "| **Step** | **Description** |\n", + "|-----------|---------------------------------------------------------------------------------|\n", + "| **1** | **Data Generation**: Create synthetic health data with realistic patterns |\n", + "| **2** | **TimeFrame Setup**: Initialize temporal data structures for each health metric |\n", + "| **3** | **ML Processing**: Prepare data for one-step-ahead forecasting |\n", + "| **4** | **DL Processing**: Prepare sequence data for deep learning models |\n", + "| **5** | **Temporal Splits**: Create proper train/test partitions |\n", + "\n", + "### Key Concepts\n", + "\n", + "- **Multiple Health Metrics**: Blood pressure, stress levels, and heart rate\n", + "- **Temporal Patterns**: Daily, weekly, and seasonal variations\n", + "- **Forecasting Approaches**: Both one-step-ahead and sequence-based predictions\n", + "- **Proper Validation**: Time-aware train/test splitting\n", + "\n", + "### Steps\n", + "\n", + "1. **Generate Health Data**\n", + " - Create synthetic but realistic health measurements\n", + " - Include known physiological patterns and correlations\n", + "\n", + "2. **Initialize TimeFrames**\n", + " - Separate temporal structures for each health metric\n", + " - Enable parallel processing capabilities\n", + "\n", + "3. **Prepare Forecasting Data**\n", + " - Machine learning mode for immediate predictions\n", + " - Deep learning mode for sequence-based analysis\n", + "\n", + "4. **Create Temporal Splits**\n", + " - Sliding window approach\n", + " - Maintain temporal ordering\n", + " - Multiple validation periods" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from datetime import datetime, timedelta\n", + "\n", + "from temporalscope.core.temporal_data_loader import TimeFrame\n", + "from temporalscope.core.temporal_target_shifter import TemporalTargetShifter\n", + "from temporalscope.partition.sliding_window import SlidingWindowPartitioner\n", + "from temporalscope.core.core_utils import print_divider" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "source": [ + "def generate_health_data(start_date: str = '2023-01-01', days: int = 365):\n", + " \"\"\"Generate synthetic health monitoring data.\n", + " \n", + " Args:\n", + " start_date (str): Starting date for the data\n", + " days (int): Number of days to generate\n", + " \"\"\"\n", + " # Create date range for daily measurements\n", + " dates = pd.date_range(start=start_date, periods=days, freq='D')\n", + " \n", + " # Time array for generating patterns\n", + " t = np.arange(days)\n", + " \n", + " # Seasonal effect (yearly cycle)\n", + " # - Amplitude of 5 represents typical seasonal BP variation\n", + " # - 2π/365 gives us one complete cycle per year\n", + " seasonal_effect = 5 * np.sin(2 * np.pi * t / 365)\n", + " \n", + " # Weekly pattern (work week stress)\n", + " # - Amplitude of 3 for weekly BP fluctuation\n", + " # - 2π/7 gives us one complete cycle per week\n", + " weekly_effect = 3 * np.sin(2 * np.pi * t / 7)\n", + " \n", + " # Blood Pressure Generation\n", + " # Systolic (120 typical baseline)\n", + " # - Stronger influence from seasonal & weekly patterns\n", + " # - Random variation (σ=3) for daily fluctuations\n", + " systolic = 120 + seasonal_effect + weekly_effect + np.random.normal(0, 3, days)\n", + " \n", + " # Diastolic (80 typical baseline)\n", + " # - Less affected by external patterns (multiplied by 0.5)\n", + " # - Smaller random variation (σ=2)\n", + " diastolic = 80 + seasonal_effect * 0.5 + weekly_effect * 0.5 + np.random.normal(0, 2, days)\n", + " \n", + " # Stress Level Generation (0-100 scale)\n", + " # - Heavily influenced by weekly pattern (work stress)\n", + " # - Larger random variation (σ=5) for daily life events\n", + " # - Clipped to valid range [0,100]\n", + " stress = 50 + weekly_effect + np.random.normal(0, 5, days)\n", + " stress = np.clip(stress, 0, 100)\n", + " \n", + " # Heart Rate Generation\n", + " # - Baseline of 70 bpm\n", + " # - Correlates with stress (0.3 coefficient)\n", + " # - Weekly pattern influence\n", + " # - Moderate random variation (σ=3)\n", + " heart_rate = 70 + 0.3 * stress + weekly_effect + np.random.normal(0, 3, days)\n", + " \n", + " return pd.DataFrame({\n", + " 'ds': dates,\n", + " 'systolic': systolic,\n", + " 'diastolic': diastolic,\n", + " 'stress_level': stress,\n", + " 'heart_rate': heart_rate\n", + " })" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "source": [ + "def create_metric_timeframes(df):\n", + " \"\"\"Create TimeFrame objects for each health metric.\n", + " \n", + " Why separate TimeFrames?\n", + " - Each metric might need different forecasting horizons\n", + " - Allows parallel processing of different metrics\n", + " - Can apply different temporal transformations per metric\n", + " \"\"\"\n", + " metrics = ['systolic', 'diastolic', 'stress_level', 'heart_rate']\n", + " timeframes = {}\n", + " \n", + " for metric in metrics:\n", + " # Using pandas backend for simplicity\n", + " # Could switch to Modin/Polars for larger datasets\n", + " timeframes[metric] = TimeFrame(\n", + " df=df,\n", + " time_col='ds', # datetime column\n", + " target_col=metric, # metric to forecast\n", + " backend='pd'\n", + " )\n", + " \n", + " return timeframes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "source": [ + "def prepare_forecasting_data(timeframe, mode='machine_learning', sequence_length=7):\n", + " \"\"\"Prepare data for forecasting using TemporalTargetShifter.\n", + " \n", + " Two modes supported:\n", + " 1. Machine Learning (ml) mode:\n", + " - One-step-ahead prediction\n", + " - Useful for immediate forecasts (next day)\n", + " - Better for interpretable models (regression, etc.)\n", + " \n", + " 2. Deep Learning (dl) mode:\n", + " - Sequence-to-sequence prediction\n", + " - Captures longer temporal patterns\n", + " - Better for complex patterns (LSTM, etc.)\n", + " - sequence_length=7 for weekly patterns\n", + " \"\"\"\n", + " shifter = TemporalTargetShifter(\n", + " n_lags=1, # How many steps to look ahead\n", + " mode=mode,\n", + " sequence_length=sequence_length if mode == 'deep_learning' else None,\n", + " verbose=True\n", + " )\n", + " \n", + " return shifter.fit_transform(timeframe)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "source": [ + "def create_temporal_splits(timeframe, num_partitions=3):\n", + " \"\"\"Create temporal train/test splits using sliding window.\n", + " \n", + " Why sliding window?\n", + " - Maintains temporal ordering (crucial for time series)\n", + " - Multiple partitions to assess model stability\n", + " - Each partition moves forward in time\n", + " - 70/30 split preserves enough history for training\n", + " \n", + " Why num_partitions=3?\n", + " - Tests model on different time periods\n", + " - Captures seasonal variations\n", + " - Balance between validation and data usage\n", + " \"\"\"\n", + " partitioner = SlidingWindowPartitioner(\n", + " tf=timeframe,\n", + " num_partitions=num_partitions, # Number of temporal splits\n", + " train_pct=0.7, # 70% for training\n", + " test_pct=0.3 # 30% for testing\n", + " )\n", + " \n", + " return list(partitioner.fit_transform())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "source": [ + "if __name__ == \"__main__\":\n", + " # Step 1: Generate synthetic health data\n", + " print_divider()\n", + " print(\"Generating synthetic health data...\")\n", + " health_df = generate_health_data()\n", + " print(\"Preview of generated health data:\")\n", + " print(health_df.head())\n", + " print_divider()\n", + " \n", + " # Step 2: Create TimeFrames for each metric\n", + " print(\"Initializing TimeFrames for each health metric...\")\n", + " metric_timeframes = create_metric_timeframes(health_df)\n", + " \n", + " # Step 3: Demonstrate both ML and DL approaches\n", + " print(\"\\nPreparing data for different forecasting approaches:\")\n", + " for metric in ['heart_rate', 'stress_level']:\n", + " print(f\"\\nProcessing {metric}:\")\n", + " \n", + " # ML mode (one-step-ahead)\n", + " print(\"\\nMachine Learning mode (one-step-ahead):\")\n", + " ml_data = prepare_forecasting_data(metric_timeframes[metric], mode='machine_learning')\n", + " print(ml_data.head())\n", + " \n", + " # DL mode (sequence)\n", + " print(\"\\nDeep Learning mode (sequence-based):\")\n", + " dl_data = prepare_forecasting_data(metric_timeframes[metric], mode='deep_learning')\n", + " print(dl_data.head())\n", + " \n", + " print_divider()\n", + " \n", + " # Step 4: Create and demonstrate temporal splits\n", + " print(\"\\nCreating temporal splits for validation:\")\n", + " heart_rate_splits = create_temporal_splits(metric_timeframes['heart_rate'])\n", + " \n", + " for i, partition in enumerate(heart_rate_splits):\n", + " print(f\"\\nPartition {i+1}:\")\n", + " print(f\"Train shape: {partition['partition_1']['train'].shape}\")\n", + " print(f\"Test shape: {partition['partition_1']['test'].shape}\")\n", + " \n", + " print_divider()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From a98e112ccbf9ac6121e753b15f1d8a33062e2bd2 Mon Sep 17 00:00:00 2001 From: sylo Date: Tue, 12 Nov 2024 19:44:48 -0500 Subject: [PATCH 2/2] docs: notebook tutorial for downstream temporal x AI workflows new notebook to show potential use case on health data TODO: will expand downstream visualisations once backend is fixed --- tutorial_notebooks/3_health_monitoring_analysis.ipynb | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tutorial_notebooks/3_health_monitoring_analysis.ipynb b/tutorial_notebooks/3_health_monitoring_analysis.ipynb index d033e21..d56ce3f 100644 --- a/tutorial_notebooks/3_health_monitoring_analysis.ipynb +++ b/tutorial_notebooks/3_health_monitoring_analysis.ipynb @@ -8,7 +8,7 @@ "\n", "## Overview\n", "\n", - "This tutorial demonstrates how to analyze temporal health data using the **TemporalScope** framework. We'll work with multiple health metrics to showcase both machine learning and deep learning approaches to temporal analysis.\n", + "This tutorial demonstrates how to analyze temporal biological data using the **TemporalScope** framework. We'll work with multiple health metrics to showcase both machine learning and deep learning approaches to temporal analysis.\n", "\n", "### Summary\n", "\n", @@ -51,6 +51,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", @@ -66,6 +67,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "def generate_health_data(start_date: str = '2023-01-01', days: int = 365):\n", " \"\"\"Generate synthetic health monitoring data.\n", @@ -128,6 +130,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "def create_metric_timeframes(df):\n", " \"\"\"Create TimeFrame objects for each health metric.\n", @@ -157,6 +160,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "def prepare_forecasting_data(timeframe, mode='machine_learning', sequence_length=7):\n", " \"\"\"Prepare data for forecasting using TemporalTargetShifter.\n", @@ -187,6 +191,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "def create_temporal_splits(timeframe, num_partitions=3):\n", " \"\"\"Create temporal train/test splits using sliding window.\n", @@ -216,6 +221,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "if __name__ == \"__main__\":\n", " # Step 1: Generate synthetic health data\n",