diff --git a/notebooks/Analyze_user_behavior_across_different_lending_protocols.ipynb b/notebooks/Analyze_user_behavior_across_different_lending_protocols.ipynb new file mode 100644 index 00000000..2fa882db --- /dev/null +++ b/notebooks/Analyze_user_behavior_across_different_lending_protocols.ipynb @@ -0,0 +1,545 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "899660e4-3fb6-4106-a69f-4f321c01adea", + "metadata": {}, + "source": [ + "## Introduction" + ] + }, + { + "cell_type": "markdown", + "id": "026285ad-97b0-448a-883b-b9fb341f5807", + "metadata": {}, + "source": [ + "This study aims to analyze user interactions with various lending protocols, providing insights into their borrowing and lending patterns. The aim of the study is achieved by loading loan data from different sources and analysing user behaviour across the lending protocols. " + ] + }, + { + "cell_type": "markdown", + "id": "bf946978-9501-4e90-9e0f-7605959dc8bf", + "metadata": {}, + "source": [ + "## Objective\n", + "The primary objective of this study is to analyze user behavior across multiple lending protocols. We will achieve this by examining the data on loans, including user information, protocol details, collateral, and debt amounts. Our analysis will focus on answering key questions related to user engagement with different protocols, such as the number of users providing liquidity or borrowing on one or multiple protocols and the distribution of staked/borrowed capital across these protocols." + ] + }, + { + "cell_type": "markdown", + "id": "c0737f59-88be-4520-825e-639f2ad20d88", + "metadata": {}, + "source": [ + "## Methodology\n", + "\n", + "To conduct this analysis, we applied the following structured approach:\n", + "1. Data Loading: Create a data data load function that allows for easy switching between google storage and sql database, ensuring flexibility in data sourcing.\n", + "2. Data Visualization: Visualizing user behaviour across the lending protocols allow us to answer:\n", + " - The number of users providing liquidity or borrowing just one protocol versus multiple protocols.\n", + " - The distribution of borrowed capital across different lending protocols.\n", + "3. Venn Diagram Creation: Provides an overlap of user participation across different lending protocols, providing clear visual representation of multi-protocol engagement.\n" + ] + }, + { + "cell_type": "markdown", + "id": "be041630-1802-4b9b-b805-93a9d145f0db", + "metadata": {}, + "source": [ + "## Expected Outcomes\n", + "\n", + "This study will not only shed light on current user engagement patterns but also pave the way for future research and development in decentralized lending and borrowing platforms." + ] + }, + { + "cell_type": "markdown", + "id": "26184230-4eaf-4711-a644-32a273981129", + "metadata": {}, + "source": [ + "### Importing Libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "3bcbd644-a92c-4cb3-84ed-64eb71dd0cff", + "metadata": {}, + "outputs": [], + "source": [ + "# importing necessary libraries\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "from matplotlib_venn import venn3\n", + "from sqlalchemy import create_engine\n", + "import gcsfs" + ] + }, + { + "cell_type": "markdown", + "id": "58bf0c47-95f4-4556-8b9d-fc780670f5ce", + "metadata": {}, + "source": [ + "### Loading the Data" + ] + }, + { + "cell_type": "markdown", + "id": "99f508d9-4c05-4058-9974-ca6f6398ae88", + "metadata": {}, + "source": [ + "#### From Postgress" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "fb9b9d7d-1abc-498b-8358-7c12cb01b0f4", + "metadata": {}, + "outputs": [], + "source": [ + "# from sqlalchemy import create_engine\n", + "\n", + "# # List of protocols (table names in the PostgreSQL database)\n", + "# protocols = [\"zklend\", \"nostra_alpha\", \"nostra_mainnet\", \"hashstack_v0\", \"hashstack_v1\"]\n", + "\n", + "# # Database connection string\n", + "# db_connection_string = 'postgresql://username:password@hostname:port/database'\n", + "\n", + "# # Load data from PostgreSQL\n", + "# postgres_df_list = []\n", + "# engine = create_engine(db_connection_string)\n", + "\n", + "# for protocol in protocols:\n", + "# df = pd.read_sql_table(protocol, con=engine)\n", + "# df['Protocol'] = protocol\n", + "# postgres_df_list.append(df)\n", + "\n", + "# # Combine all PostgreSQL DataFrames into one\n", + "# df_loans_postgres = pd.concat(postgres_df_list, ignore_index=True)a" + ] + }, + { + "cell_type": "markdown", + "id": "c5820bcd-27a7-4a72-b2c9-8b81dda59110", + "metadata": {}, + "source": [ + "#### From GCS" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "bc9bd276-f6c0-4903-8f18-85e580de6f2d", + "metadata": {}, + "outputs": [], + "source": [ + "# Dictionary of Parquet URLs\n", + "parquet_urls = {\n", + " \"zklend\": \"https://storage.googleapis.com/derisk-persistent-state/zklend_data/loans.parquet\",\n", + " \"nostra_alpha\": \"https://storage.googleapis.com/derisk-persistent-state/nostra_alpha_data/loans.parquet\",\n", + " \"nostra_mainnet\": \"https://storage.googleapis.com/derisk-persistent-state/nostra_mainnet_data/loans.parquet\",\n", + " \"hashstack_v0\": \"https://storage.googleapis.com/derisk-persistent-state/hashstack_v0_data/loans.parquet\",\n", + " \"hashstack_v1\": \"https://storage.googleapis.com/derisk-persistent-state/hashstack_v1_data/loans.parquet\",\n", + "}\n", + "\n", + "# Load data from GCS\n", + "gcs_df_list = []\n", + "for protocol, url in parquet_urls.items():\n", + " fs = gcsfs.GCSFileSystem()\n", + " gcs_path = url.replace('https://storage.googleapis.com/', '')\n", + " with fs.open(gcs_path, 'rb') as f:\n", + " df = pd.read_parquet(f, engine='pyarrow')\n", + " df['Protocol'] = protocol\n", + " gcs_df_list.append(df)\n", + "\n", + "# Combine all GCS DataFrames into one\n", + "df_loans = pd.concat(gcs_df_list, ignore_index=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6a9975f0-b5aa-49b4-bd47-b560d7c590e8", + "metadata": {}, + "outputs": [], + "source": [ + "df_loans.head()" + ] + }, + { + "cell_type": "markdown", + "id": "1f0db2bb-ca64-4e30-b3d5-9e864289a944", + "metadata": {}, + "source": [ + "### Determine User Activity\n", + "#### Users Providing Liquidity and their Protocols" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6a72339-26b1-49e6-943f-4bea5ba8b3a3", + "metadata": {}, + "outputs": [], + "source": [ + "# the distribution of protocols among users\n", + "df_loans['Protocol'].value_counts()" + ] + }, + { + "cell_type": "markdown", + "id": "b1d0cfee-cd8d-462a-a063-ae38030b501c", + "metadata": {}, + "source": [ + "### Subset the DataFrame for users who provide liquidity" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "084931be-14e4-4182-91dd-fa5701265967", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "from collections import defaultdict, Counter\n", + "\n", + "liquidity_data = df_loans[df_loans['Collateral (USD)'] > 0]\n", + "\n", + "# Initialize a dictionary to store users and their associated protocols for liquidity\n", + "user_protocols_liquidity = defaultdict(set)\n", + "\n", + "# Populate the dictionary\n", + "for _, row in liquidity_data.iterrows():\n", + " user = row['User']\n", + " protocol = row['Protocol']\n", + " user_protocols_liquidity[user].add(protocol)\n", + "\n", + "# Count the number of protocols each user lends on\n", + "user_protocol_counts_liquidity = Counter([len(protocols) for protocols in user_protocols_liquidity.values()])\n", + "\n", + "# Convert the counter to a DataFrame for better readability\n", + "protocol_count_df_liquidity = pd.DataFrame.from_dict(user_protocol_counts_liquidity, orient='index').reset_index()\n", + "protocol_count_df_liquidity.columns = ['Number of Protocols', 'Number of Users']\n", + "\n", + "# Sort the DataFrame by the number of protocols\n", + "protocol_count_df_liquidity = protocol_count_df_liquidity.sort_values(by='Number of Protocols')" + ] + }, + { + "cell_type": "markdown", + "id": "db697ca0-30fe-4d21-a5fc-b01c5db05ad9", + "metadata": {}, + "source": [ + "##### Users Providing Liquidity Across the Top 3 Protocols" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "2853f77c-143b-4d6a-b584-20a515fa7d09", + "metadata": {}, + "outputs": [], + "source": [ + "## Helper funcitons:\n", + "# Function to get unique users per protocol\n", + "def get_unique_users(df, value_column):\n", + " protocol_users = defaultdict(set)\n", + " for protocol in df['Protocol'].unique():\n", + " users = set(df[df['Protocol'] == protocol]['User'])\n", + " protocol_users[protocol].update(users)\n", + " return protocol_users\n", + " \n", + "# Helper function to plot Venn diagram\n", + "def plot_venn_diagram(user_sets, title):\n", + " plt.figure(figsize=(10, 8))\n", + " venn3(subsets=(user_sets[0], user_sets[1], user_sets[2]), \n", + " set_labels=('zklend', 'nostra_mainnet', 'nostra_alpha'))\n", + " plt.title(title)\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9af460f2-dc1a-427c-a564-944ef18499e6", + "metadata": {}, + "outputs": [], + "source": [ + "# Get unique users providing liquidity\n", + "liquidity_df = df_loans[df_loans['Collateral (USD)'] > 0]\n", + "liquidity_protocol_users = get_unique_users(liquidity_df, 'Collateral (USD)')\n", + "\n", + "\n", + "# Prepare sets for Venn diagrams (top 3 protocols by user count)\n", + "top_protocols = ['zklend', 'nostra_mainnet', 'nostra_alpha']\n", + "liquidity_user_sets = [liquidity_protocol_users[protocol] for protocol in top_protocols]\n", + "\n", + "\n", + "# Plot Venn diagrams\n", + "plot_venn_diagram(liquidity_user_sets, 'Users Providing Liquidity Across Top 3 Protocols')\n", + "# plot_venn_diagram(debt_user_sets, 'Users Borrowing Across Top 3 Protocols')" + ] + }, + { + "cell_type": "markdown", + "id": "08c23e4b-d5f0-4c9e-bf78-05ee72c667ae", + "metadata": {}, + "source": [ + "#### Users Borrowing Behavior and their Protocols" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "ccdd4123-9a4e-4def-93a9-d8d21b637962", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# Subset the DataFrame for users who have debt\n", + "debt_data = df_loans[df_loans['Debt (USD)'] > 0]\n", + "\n", + "# Initialize a dictionary to store users and their associated protocols for debt\n", + "user_protocols_debt = defaultdict(set)\n", + "\n", + "# Populate the dictionary\n", + "for _, row in debt_data.iterrows():\n", + " user = row['User']\n", + " protocol = row['Protocol']\n", + " user_protocols_debt[user].add(protocol)\n", + "\n", + "# Count the number of protocols each user borrows on\n", + "user_protocol_counts_debt = Counter([len(protocols) for protocols in user_protocols_debt.values()])\n", + "\n", + "# Convert the counter to a DataFrame for better readability\n", + "protocol_count_df_debt = pd.DataFrame.from_dict(user_protocol_counts_debt, orient='index').reset_index()\n", + "protocol_count_df_debt.columns = ['Number of Protocols', 'Number of Users']\n", + "\n", + "# Sort the DataFrame by the number of protocols\n", + "protocol_count_df_debt = protocol_count_df_debt.sort_values(by='Number of Protocols')\n", + "\n", + "# Print the result for debt\n", + "# print(\"Users borrowing:\")\n", + "# print(protocol_count_df_debt)" + ] + }, + { + "cell_type": "markdown", + "id": "2bd81043-e6fc-44cf-b6d3-57c18656abfb", + "metadata": {}, + "source": [ + "##### Users Borrowing Across the Top 3 Protocols" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0895eb29-a63d-4fcd-998d-0f77ae444fce", + "metadata": {}, + "outputs": [], + "source": [ + "# Get unique users having debt\n", + "debt_df = df_loans[df_loans['Debt (USD)'] > 0]\n", + "debt_protocol_users = get_unique_users(debt_df, 'Debt (USD)')\n", + "\n", + "\n", + "# Prepare sets for Venn diagrams (top 3 protocols by user count)\n", + "top_protocols = ['zklend', 'nostra_mainnet', 'nostra_alpha']\n", + "debt_user_sets = [debt_protocol_users[protocol] for protocol in top_protocols]\n", + "\n", + "# Plot Venn diagrams\n", + "plot_venn_diagram(debt_user_sets, 'Users Borrowing Across Top 3 Protocols')" + ] + }, + { + "cell_type": "markdown", + "id": "5b1906e6-2ccf-42c5-ab43-7f7e47b925ef", + "metadata": {}, + "source": [ + "#### Distribution of stacked/borrowed capital across Protocols" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0c4a2a57-bd8e-44e0-ab0a-c969a311cb2a", + "metadata": {}, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "\n", + "# Function to calculate total capital per token across protocols\n", + "def calculate_capital(df, column_name):\n", + " capital_per_protocol = df.groupby('Protocol')[column_name].sum()\n", + " return capital_per_protocol\n", + "\n", + "# Function to plot bar chart for token capital across protocols\n", + "def plot_capital(capital, title):\n", + " plt.figure(figsize=(10, 6))\n", + " sns.barplot(x=capital.index, y=capital.values)\n", + " plt.xlabel('Protocol')\n", + " plt.ylabel('Total Capital (USD)')\n", + " plt.title(title)\n", + " plt.xticks(rotation=45)\n", + " plt.show()\n", + "\n", + "# Calculate total staked capital per token\n", + "staked_capital = calculate_capital(liquidity_df, 'Collateral (USD)')\n", + "plot_capital(staked_capital, 'Total Staked Capital per Token Across Protocols')\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "e6974c13-63b4-4722-b5a5-215c46c1c335", + "metadata": {}, + "source": [ + "#### Total capital borrowed per token" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c66e6f79-aeb8-41e0-aa01-a17ee535d50f", + "metadata": {}, + "outputs": [], + "source": [ + "# Calculate total borrowed capital per token\n", + "borrowed_capital = calculate_capital(debt_df, 'Debt (USD)')\n", + "plot_capital(borrowed_capital, 'Total Borrowed Capital Across Protocols')" + ] + }, + { + "cell_type": "markdown", + "id": "05355d6c-0595-41d4-b509-09834bd84744", + "metadata": {}, + "source": [ + "#### To Analyze the amounts stacked on a per token basis across the protocols" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "74401d69-fff8-4c41-a5ce-6f7e4b1800c5", + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "# List of tokens\n", + "tokens = [\"ETH\", \"wBTC\", \"USDC\", \"DAI\", \"USDT\", \"wstETH\", \"LORDS\", \"STRK\", \"UNO\", \"ZEND\"]\n", + "\n", + "def parse_token_amounts(column, protocol_column, tokens):\n", + " token_amounts = defaultdict(lambda: defaultdict(float))\n", + " for entry, protocol in zip(column, protocol_column):\n", + " for token in tokens:\n", + " match = re.search(f'{token}: ([0-9.]+)', entry)\n", + " if match:\n", + " token_amounts[protocol][token] += float(match.group(1))\n", + " return token_amounts\n", + "\n", + "# Extract token amounts for collateral and debt\n", + "collateral_amounts = parse_token_amounts(df_loans['Collateral'], df_loans['Protocol'], tokens)\n", + "debt_amounts = parse_token_amounts(df_loans['Debt'], df_loans['Protocol'], tokens)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "7ac76044-ee76-4807-b497-ad1541ec45a2", + "metadata": {}, + "outputs": [], + "source": [ + "# agregating the data\n", + "# Convert the aggregated data to DataFrame for better readability\n", + "collateral_list = [(protocol, token, amount) for protocol, tokens in collateral_amounts.items() for token, amount in tokens.items()]\n", + "collateral_df = pd.DataFrame(collateral_list, columns=['Protocol', 'Token', 'Total Collateral (USD)'])\n", + "\n", + "debt_list = [(protocol, token, amount) for protocol, tokens in debt_amounts.items() for token, amount in tokens.items()]\n", + "debt_df = pd.DataFrame(debt_list, columns=['Protocol', 'Token', 'Total Debt (USD)'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "521fbd91-b7d6-4cfd-99aa-a203e63f3daa", + "metadata": {}, + "outputs": [], + "source": [ + "collateral_df.groupby(['Protocol','Token'])['Total Collateral (USD)'].sum()" + ] + }, + { + "cell_type": "markdown", + "id": "6d78664c-e8bc-4b00-997a-f31de512ec84", + "metadata": {}, + "source": [ + "#### Data Visualization" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d654007a-8ef0-4c33-ab35-50f65b030cfd", + "metadata": {}, + "outputs": [], + "source": [ + "# Visualization\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "# Plotting collateral amounts\n", + "plt.figure(figsize=(12, 8))\n", + "sns.barplot(data=collateral_df, x='Protocol', y='Total Collateral (USD)', hue='Token')\n", + "plt.xlabel('Tokens')\n", + "plt.ylabel('Total Collateral (USD)')\n", + "plt.title('Total Collateral per Token and Protocol')\n", + "plt.xticks(rotation=45)\n", + "plt.legend(title='Protocol')\n", + "plt.show()\n", + "\n", + "# Plotting debt amounts\n", + "plt.figure(figsize=(12, 8))\n", + "sns.barplot(data=debt_df, x='Protocol', y='Total Debt (USD)', hue='Token')\n", + "plt.xlabel('Tokens')\n", + "plt.ylabel('Total Debt (USD)')\n", + "plt.title('Total Debt per Token and Protocol')\n", + "plt.xticks(rotation=45)\n", + "plt.legend(title='Protocol')\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "28bd7cf4-a32b-44a6-9111-684bf7d43335", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}