From 2c8949b0bb86dd109a8747453880ebcadc59477a Mon Sep 17 00:00:00 2001 From: Phil Lu <23727507+allt0ld@users.noreply.github.com> Date: Mon, 14 Oct 2024 12:35:14 -0400 Subject: [PATCH] Sim Pipeline Redesign --- Makefile | 2 +- curvesim/__init__.py | 10 +- curvesim/constants.py | 18 + curvesim/metrics/__init__.py | 2 +- curvesim/metrics/state_log/log.py | 54 +- curvesim/network/coingecko.py | 20 +- curvesim/network/curve_prices.py | 195 +++++++- curvesim/network/subgraph.py | 6 +- curvesim/pipelines/__init__.py | 11 +- curvesim/pipelines/common/__init__.py | 2 + curvesim/pipelines/common/get_pool_data.py | 2 +- curvesim/pipelines/config.gin | 77 ++- curvesim/pipelines/simple/__init__.py | 8 +- curvesim/pipelines/simple/trader.py | 9 +- curvesim/pipelines/simulation.py | 352 ++++++++++++-- .../pipelines/vol_limited_arb/__init__.py | 5 +- curvesim/pipelines/vol_limited_arb/trader.py | 19 +- curvesim/pool/__init__.py | 8 +- curvesim/pool/sim_interface/cryptoswap.py | 2 +- curvesim/pool_data/__init__.py | 2 +- curvesim/pool_data/metadata/cryptoswap.py | 2 +- .../pool_data/metadata/sim_market_config.py | 460 ++++++++++++++++++ curvesim/pool_data/metadata/stableswap.py | 2 +- curvesim/pool_data/queries/metadata.py | 61 ++- curvesim/price_data/__init__.py | 10 +- curvesim/price_data/data_sources/__init__.py | 1 + curvesim/price_data/data_sources/coingecko.py | 3 +- curvesim/price_data/data_sources/local.py | 17 +- curvesim/sim/__init__.py | 1 + curvesim/templates/data_source.py | 47 +- curvesim/templates/log.py | 4 +- curvesim/templates/reference_market.py | 50 +- curvesim/templates/strategy.py | 19 +- curvesim/templates/time_sequence.py | 6 +- curvesim/templates/trader.py | 39 +- curvesim/utils/address.py | 2 +- test/integration/test_get_pool_metadata.py | 1 + test/integration/test_subgraph.py | 1 + test/pool_metadata.py | 1 + 39 files changed, 1306 insertions(+), 225 deletions(-) create mode 100644 curvesim/pool_data/metadata/sim_market_config.py diff --git a/Makefile b/Makefile index ed22ea332..107360c74 100644 --- a/Makefile +++ b/Makefile @@ -89,7 +89,7 @@ format: black: @echo "$(REVERSE)Running$(RESET) $(BOLD)black$(RESET)..." @black --version - @black . + @black --extend-exclude "/(lib|bin)" . @echo "$(REVERSE)Running$(RESET) $(BOLD)isort$(RESET)..." @isort --version-number @isort curvesim diff --git a/curvesim/__init__.py b/curvesim/__init__.py index 10170ee3b..b7570f766 100644 --- a/curvesim/__init__.py +++ b/curvesim/__init__.py @@ -1,7 +1,7 @@ """Package to simulate Curve pool.""" -__all__ = ["autosim", "bonding_curve", "order_book", "__version__", "__version_info__"] +# __all__ = ["autosim", "bonding_curve", "order_book", "__version__", "__version_info__"] -from ._order_book import order_book -from .sim import autosim -from .tools import bonding_curve -from .version import __version__, __version_info__ +# from ._order_book import order_book +# from .sim import autosim +# from .tools import bonding_curve +# from .version import __version__, __version_info__ diff --git a/curvesim/constants.py b/curvesim/constants.py index f02f68439..6389bd3bd 100644 --- a/curvesim/constants.py +++ b/curvesim/constants.py @@ -35,6 +35,8 @@ class Chain(StrEnum): AVALANCHE = "avalanche" MATIC = "matic" XDAI = "xdai" + BASE = "base" + FRAXTAL = "fraxtal" class Env(StrEnum): @@ -42,3 +44,19 @@ class Env(StrEnum): PROD = "prod" STAGING = "staging" + + +class CurvePreset(StrEnum): + """Names for presets within Curve.fi's pool creation UI.""" + # Stableswap + FIAT_REDEEMDABLE_STABLECOINS = "fiat redeemable stablecoins" + CRYPTO_COLLATERALIZED_STABLECOINS = "crypto collateralized stablecoins" + STABLESWAP_LIQUID_RESTAKING_TOKENS = "stableswap liquid restaking tokens" + # Cryptoswap + CRYPTO = "crypto" + FOREX = "forex" + LIQUID_STAKING_DERIVATIVES = "liquid staking derivatives" + CRYPTOSWAP_LIQUID_RESTAKING_TOKENS = "cryptoswap liquid restaking tokens" + # Tricrypto + TRICRYPTO = "tricrypto" # USD-wrapped BTC-ETH + THREE_COIN_VOLATILE = "three coin volatile" # USD-ETH-any volatile token diff --git a/curvesim/metrics/__init__.py b/curvesim/metrics/__init__.py index a54076295..c3f7f3a94 100644 --- a/curvesim/metrics/__init__.py +++ b/curvesim/metrics/__init__.py @@ -12,7 +12,7 @@ 2. :class:`.StateLog`: Logger that records simulation/pool state throughout a simulation and computes - metrics at the end of each simulation run. + metrics at the end of each simulation run. TODO: update 3. :class:`.SimResults`: Container for simulation results with methods to plot or return recorded diff --git a/curvesim/metrics/state_log/log.py b/curvesim/metrics/state_log/log.py index 614f89712..a65d508fb 100644 --- a/curvesim/metrics/state_log/log.py +++ b/curvesim/metrics/state_log/log.py @@ -2,9 +2,10 @@ Module to house the `StateLog`, a generic class to record changing pool states during simulations. """ -from pandas import DataFrame, concat +import gin + +from pandas import DataFrame -from curvesim.metrics.base import PoolMetric from curvesim.templates import Log from curvesim.utils import override @@ -12,31 +13,41 @@ from .pool_state import get_pool_state +@gin.register class StateLog(Log): """ Logger that records simulation/pool state throughout each simulation run and - computes metrics at the end of each run. + computes metrics at the end of each run. TODO: CHANGE LATER """ __slots__ = [ - "metrics", - "pool", "state_per_run", "state_per_trade", ] - def __init__(self, pool, metrics): - self.pool = pool - self.metrics = prepare_metrics(metrics, pool) + def __init__(self, pool): + """ + TODO + pool : SimPool + """ + # TODO: config the func with gin? self.state_per_run = get_pool_parameters(pool) self.state_per_trade = [] @override def update(self, **kwargs): """Records pool state and any keyword arguments provided.""" + state: dict = {} + + if "pool" in kwargs: + pool = kwargs.pop("pool") # Simpool type + state["pool_state"] = get_pool_state(pool) + + state.update(kwargs) - self.state_per_trade.append({"pool_state": get_pool_state(self.pool), **kwargs}) + self.state_per_trade.append(state) + @override def get_logs(self): """Returns the accumulated log data.""" @@ -49,28 +60,3 @@ def get_logs(self): "pool_parameters": DataFrame(self.state_per_run, index=[0]), **state_per_trade, } - - @override - def compute_metrics(self): - """Computes metrics from the accumulated log data.""" - - state_logs = self.get_logs() - metric_data = [metric.compute(state_logs) for metric in self.metrics] - data_per_trade, summary_data = tuple(zip(*metric_data)) # transpose tuple list - - return ( - state_logs["pool_parameters"], - concat(data_per_trade, axis=1), - concat(summary_data, axis=1), - ) - - -def prepare_metrics(metrics, pool): - """ - Applies any neccessary preparations to the input metrics. - Currently, only updates the pool object for PoolMetrics. - """ - for metric in metrics: - if isinstance(metric, PoolMetric): - metric.set_pool(pool) - return metrics diff --git a/curvesim/network/coingecko.py b/curvesim/network/coingecko.py index f02472e21..02b9271f6 100644 --- a/curvesim/network/coingecko.py +++ b/curvesim/network/coingecko.py @@ -1,14 +1,22 @@ """ Network connector for Coingecko. """ +import os + # pylint: disable=redefined-outer-name import pandas as pd +from curvesim.utils import get_env_var + from .http import HTTP from .utils import sync URL = "https://api.coingecko.com/api/v3/" +# Creating a free API key on Coingecko enables increased response speed and rate limits +# https://www.coingecko.com/en/api/pricing +API_KEY = get_env_var("COINGECKO_API_KEY", default=None) + PLATFORMS = { "mainnet": "ethereum", "arbitrum": "arbitrum-one", @@ -24,6 +32,7 @@ async def _get_prices(coin_id, vs_currency, start, end): url = URL + f"coins/{coin_id}/market_chart/range" p = {"vs_currency": vs_currency, "from": start, "to": end} + p = _add_api_key_param(p) r = await HTTP.get(url, params=p) @@ -46,14 +55,23 @@ async def coin_id_from_address(address, chain): address = address.lower() chain = PLATFORMS[chain.lower()] url = URL + f"coins/{chain}/contract/{address}" + p = _add_api_key_param({}) - r = await HTTP.get(url) + r = await HTTP.get(url, params=p) coin_id = r["id"] return coin_id +def _add_api_key_param(query_params: dict) -> dict: + api_key_param = "x_cg_demo_api_key" + if (API_KEY != None) and (api_key_param not in query_params): + query_params.update({api_key_param: API_KEY}) + + return query_params + + # Sync get_prices_sync = sync(get_prices) coin_id_from_address_sync = sync(coin_id_from_address) diff --git a/curvesim/network/curve_prices.py b/curvesim/network/curve_prices.py index f598f0617..7c3f7f072 100644 --- a/curvesim/network/curve_prices.py +++ b/curvesim/network/curve_prices.py @@ -1,21 +1,28 @@ """ Network connector for Curve Prices API. """ - -from typing import List +from time import time +from typing import List, Dict, Optional from eth_utils import to_checksum_address from pandas import DataFrame, to_datetime +from curvesim.logging import get_logger from curvesim.exceptions import ApiResultError, CurvesimValueError from .http import HTTP from .utils import sync +logger = get_logger(__name__) + URL = "https://prices.curve.fi/v1/" -CHAIN_ALIASES = {"mainnet": "ethereum"} +CHAIN_ALIASES = { + "mainnet": "ethereum", + "matic": "polygon", +} +REVERSE_CHAIN_ALIASES = {alias: chain for chain, alias in CHAIN_ALIASES.items()} async def _get_pool_pair_volume( pool_address, @@ -26,7 +33,7 @@ async def _get_pool_pair_volume( *, chain="ethereum", interval="day", -): +) -> List[Dict]: chain = _chain_from_alias(chain) pool_address = to_checksum_address(pool_address) main_token_address = to_checksum_address(main_token_address) @@ -42,16 +49,15 @@ async def _get_pool_pair_volume( } r = await HTTP.get(url, params=params) - try: - data = r["data"] - except KeyError as e: + data: List[Dict] = r["data"] + if data == []: raise ApiResultError( "No historical volume returned for\n" f"Pool: '{pool_address}', Chain: '{chain}',\n" f"Tokens: (main: {main_token_address}, " f"reference: {reference_token_address}),\n" f"Timestamps: (start: {start_ts}, end: {end_ts})" - ) from e + ) return data @@ -114,16 +120,179 @@ async def get_pool_pair_volume( return df +async def _pool_metadata(address, chain) -> Dict: + """""" + chain = _chain_from_alias(chain) + address = to_checksum_address(address) + url = URL + f"pools/{chain}/{address}/metadata" + + r = await HTTP.get(url) + data: dict = r + + # TODO - see _get_pool_pair_volume + if data["vyper_version"] == None or data["deployment_tx"] == None or data["deployment_block"] == None: + raise ApiResultError() + + return data + + +async def _pool_parameters(address, chain, start_ts, end_ts) -> List[Dict]: + """""" + chain = _chain_from_alias(chain) + address = to_checksum_address(address) + url = URL + f"snapshots/{chain}/{address}" + params = {"start": start_ts, "end": end_ts} + + r = await HTTP.get(url, params=params) + + data: List[Dict] = r["data"] + if data == []: + # TODO + raise ApiResultError( + "pass\n" + f"Timestamps: (start: {start_ts}, end: {end_ts})" + ) + + return data + + +async def _pool_balances(address, chain, start_ts, end_ts, unit="day") -> List[Dict]: + """""" + chain = _chain_from_alias(chain) + address = to_checksum_address(address) + url = URL + f"snapshots/{chain}/{address}/tvl" + params = {"start": start_ts, "end": end_ts, "unit": unit} + + r = await HTTP.get(url, params=params) + + data: list[dict] = r["data"] + if data == []: + # TODO + raise ApiResultError( + "pass\n" + f"Timestamps: (start: {start_ts}, end: {end_ts})" + ) + + return data + + +async def pool_snapshot(address: str, chain: str = "ethereum", end_ts: Optional[int] = None) -> Dict: + """""" + if end_ts == None: + end_ts = int(time()) + + snapshot_start = end_ts - (24 * 60 * 60) + snapshot_end = end_ts + + pool_metadata: Dict = await _pool_metadata(address, chain) + params_snapshot: List[Dict] = await _pool_parameters(address, chain, snapshot_start, snapshot_end) + balances_snapshot: List[Dict] = await _pool_balances(address, chain, snapshot_start, snapshot_end) + + latest_snapshot: Dict = params_snapshot[0] + latest_balances: Dict = balances_snapshot[0] + + if pool_metadata["metapool"]: + # metapools currently only contain one token paired with their basepool's LP token (2 total) + # however, curve-prices lumps metapool tokens with basepool tokens + coins_in_pool: list[dict] = pool_metadata["coins"][:2] + basepool = await pool_snapshot(pool_metadata["base_pool"], chain=chain, end_ts=end_ts) + else: + coins_in_pool: list[dict] = pool_metadata["coins"] + basepool = None + + coins: Dict[str, list] = {"names": [], "addresses": [], "decimals": []} + + for info_dict in coins_in_pool: + coins["names"].append(info_dict["symbol"]) + coins["addresses"].append(to_checksum_address(info_dict["address"])) + coins["decimals"].append(info_dict["decimals"]) + + normalized_reserves: list[int] = [] + unnormalized_reserves: list[int] = [] + + for balance, decimals in zip(latest_balances["balances"], coins["decimals"]): + normalized_reserves.append(int(balance * 10**18)) + unnormalized_reserves.append(int(balance * 10**decimals)) + + data = { + "name": pool_metadata["name"], + "address": to_checksum_address(address), + "chain": chain if not chain in REVERSE_CHAIN_ALIASES else REVERSE_CHAIN_ALIASES[chain], + "symbol": pool_metadata["name"], + "pool_type": pool_metadata["pool_type"], + "params": {}, + "coins": coins, + "reserves": { + "by_coin": normalized_reserves, + "unnormalized_by_coin": unnormalized_reserves, + }, + "basepool": basepool, + "timestamp": end_ts, + } + + if pool_type_to_amm[data["pool_type"]] == "stableswap": + fee_mul = latest_snapshot["offpeg_fee_multiplier"] + + params = { + "A": int(latest_snapshot["a"]), + "fee": int(latest_snapshot["fee"]), + "fee_mul": int(fee_mul) if fee_mul != None else None, + "admin_fee": int(latest_snapshot["admin_fee"]), + "virtual_price": int(latest_snapshot["virtual_price"]), + } + data["params"].update(params) + + elif pool_type_to_amm[data["pool_type"]] == "cryptoswap": + token_price_base: float = latest_balances["token_prices"][0] + last_prices: list[int] = [int(usd * 10**18 / token_price_base) for usd in latest_balances["token_prices"][1:]] + last_prices_timestamp: int = int(latest_balances["timestamp"]) + + params = { + "A": int(latest_snapshot["a"]), + "gamma": int(latest_snapshot["gamma"]), + "fee_gamma": int(latest_snapshot["fee_gamma"]), + "mid_fee": int(latest_snapshot["mid_fee"]), + "out_fee": int(latest_snapshot["out_fee"]), + "allowed_extra_profit": int(latest_snapshot["allowed_extra_profit"]), + "adjustment_step": int(latest_snapshot["adjustment_step"]), + "ma_half_time": int(latest_snapshot["ma_half_time"]), + "price_scale": [int(p) for p in latest_snapshot["price_scale"]], + "price_oracle": [int(p) for p in latest_snapshot["price_oracle"]], + "last_prices": last_prices, + "last_prices_timestamp": last_prices_timestamp, + "admin_fee": int(latest_snapshot["admin_fee"]), + "xcp_profit": int(latest_snapshot["xcp_profit"]), + "xcp_profit_a": int(latest_snapshot["xcp_profit_a"]), + "virtual_price": int(latest_snapshot["virtual_price"]), + } + data["params"].update(params) + + else: + raise ApiResultError( + f"Pulling snapshots for non-Stableswap or non-Cryptoswap pools is not yet supported. Pool type: \"{data['pool_type']}\"" + ) + + logger.debug("Pool snapshot: %s", str(data)) + + return data + + def _chain_from_alias(chain): if chain in CHAIN_ALIASES: # pylint: disable=consider-using-get chain = CHAIN_ALIASES[chain] - if chain != "ethereum": - raise CurvesimValueError( - "Curve Prices API currently only supports Ethereum chain." - ) - return chain +pool_type_to_amm: Dict[str, Optional[str]] = { + "main": "stableswap", + "crypto": "cryptoswap", + "factory": "stableswap", + "factory_crypto": "cryptoswap", + "crvusd": None, + "factory_tricrypto": "cryptoswap", + "stableswapng": "stableswap", + "twocryptong": "cryptoswap", +} +pool_snapshot_sync = sync(pool_snapshot) # TODO export? get_pool_pair_volume_sync = sync(get_pool_pair_volume) diff --git a/curvesim/network/subgraph.py b/curvesim/network/subgraph.py index 35bd50fde..efb081414 100644 --- a/curvesim/network/subgraph.py +++ b/curvesim/network/subgraph.py @@ -1,7 +1,9 @@ """ -Network connector for subgraphs -""" +Network connector for subgraphs. +2024/09 - THE SUBGRAPH IS DEPRECATED +""" +# TODO: deprecate all from datetime import datetime, timedelta, timezone from decimal import Decimal diff --git a/curvesim/pipelines/__init__.py b/curvesim/pipelines/__init__.py index f0fa68185..4eac4f988 100644 --- a/curvesim/pipelines/__init__.py +++ b/curvesim/pipelines/__init__.py @@ -26,7 +26,7 @@ logger = get_logger(__name__) -def run_pipeline(param_sampler, price_sampler, strategy, ncpu=4): +def run_pipeline(param_sampler, price_sampler, strategy, metrics, ncpu=4): """ Core function for running pipelines. @@ -45,6 +45,9 @@ def run_pipeline(param_sampler, price_sampler, strategy, ncpu=4): strategy: callable A function dictating what happens at each timestep. + metrics: List[metrics objects] + TODO: update + ncpu : int, default=4 Number of cores to use. @@ -57,7 +60,7 @@ def run_pipeline(param_sampler, price_sampler, strategy, ncpu=4): if ncpu > 1: with multiprocessing_logging_queue() as logging_queue: strategy_args_list = [ - (pool, params, price_sampler) for pool, params in param_sampler + (pool, params, price_sampler, metrics) for pool, params in param_sampler ] wrapped_args_list = [ @@ -74,8 +77,8 @@ def run_pipeline(param_sampler, price_sampler, strategy, ncpu=4): else: results = [] for pool, params in param_sampler: - metrics = strategy(pool, params, price_sampler) - results.append(metrics) + computed_metrics = strategy(pool, params, price_sampler, metrics) + results.append(computed_metrics) results = tuple(zip(*results)) return results diff --git a/curvesim/pipelines/common/__init__.py b/curvesim/pipelines/common/__init__.py index 33b1329c0..bfc0fc5ff 100644 --- a/curvesim/pipelines/common/__init__.py +++ b/curvesim/pipelines/common/__init__.py @@ -2,6 +2,7 @@ Contains variables and functions common to the arbitrage pipelines. """ __all__ = ["DEFAULT_METRICS", "get_arb_trades", "get_asset_data", "get_pool_data"] +import gin from scipy.optimize import root_scalar @@ -21,6 +22,7 @@ Metrics.PoolVolume, Metrics.ArbMetrics, ] +gin.constant("curvesim.pipelines.common.DEFAULT_METRICS", DEFAULT_METRICS) def get_arb_trades(pool, prices): diff --git a/curvesim/pipelines/common/get_pool_data.py b/curvesim/pipelines/common/get_pool_data.py index a3fe8c94e..55e2683ab 100644 --- a/curvesim/pipelines/common/get_pool_data.py +++ b/curvesim/pipelines/common/get_pool_data.py @@ -10,7 +10,7 @@ from curvesim.pool_data.metadata import PoolMetaDataInterface -def get_pool_data(metadata_or_address, chain, env, pool_ts): +def get_pool_data(metadata_or_address, chain, env, pool_ts): # TODO remove env """ Gets sim pool and (if needed) pool metadata. """ diff --git a/curvesim/pipelines/config.gin b/curvesim/pipelines/config.gin index 885567128..b86f16d19 100644 --- a/curvesim/pipelines/config.gin +++ b/curvesim/pipelines/config.gin @@ -1,35 +1,70 @@ -from curvesim.pipelines.common import DEFAULT_METRICS +import curvesim.pipelines.common +import curvesim.pipelines.simulation +import curvesim.pipelines.simple.trader +import curvesim.pipelines.vol_limited_arb.trader +import curvesim.metrics.state_log +import curvesim.templates.time_sequence +import curvesim.templates.reference_market +import curvesim.pool_data.metadata.sim_market_config +# clean up imports if can make improvements to import paths -# ----- DATA (TimeSequence/ReferenceMarket) ----- # -FrequencyTimePeriod.start = -FrequencyTimePeriod.end = -FrequencyTimePeriod.freq = +# put this file at the base of the directory? (for easier finding for users) -SimpleReferenceMarket.sim_asset_series_list = +# {scope (instance) name}/{configurable reference}.{attribute name} +# THE BELOW SYNTAX IS ONLY SUPPORTED FOR SimMarketConfig -# ----- SIM MARKET ----- # -sim_market_factory = +data = {'name': 'Curve.fi DAI/USDC/USDT', 'address': '0xbEbc44782C7dB0a1A60Cb6fe97d0b483032FF1C7', 'chain': 'mainnet', 'symbol': '3Crv', 'version': 1, 'pool_type': 'REGISTRY_V1', 'params': {'A': 2000, 'fee': 1000000, 'fee_mul': None, 'admin_fee': 5000000000, 'virtual_price': 1025499623208090719}, 'coins': {'names': ['DAI', 'USDC', 'USDT'], 'addresses': ['0x6B175474E89094C44Da98b954EedeAC495271d0F', '0xA0b86991c6218b36c1d19D4a2e9Eb0cE3606eB48', '0xdAC17F958D2ee523a2206206994597C13D831ec7'], 'decimals': [18, 6, 6]}, 'reserves': {'by_coin': [171485829393046867353492287, 175414686134396000000000000, 88973989934190000000000000], 'unnormalized_by_coin': [171485829393046867353492287, 175414686134396, 88973989934190]}, 'basepool': None, 'timestamp': 1677628800} -sim_market_parameters = { - initial_parameters = { +pool/SimMarketConfig.key_metadata = %data +pool/SimMarketConfig.overrides = {"A": [100, 200],} - }, - run_parameters = { - }, -} +#datab = {'name': 'cvxeth', 'address': '0xB576491F1E6e5E62f1d8F26062Ee822B40B0E0d4', 'chain': 'mainnet', 'symbol': 'cvxeth', 'pool_type': 'crypto', 'params': {'A': 400000, 'gamma': 145000000000000, 'fee_gamma': 230000000000000, 'mid_fee': 26000000, 'out_fee': 45000000, 'allowed_extra_profit': 2000000000000, 'adjustment_step': 146000000000000, 'ma_half_time': 600, 'price_scale': [890717172798033], 'price_oracle': [912781912171728], 'last_prices': [911650717716804], 'last_prices_timestamp': 1725840000, 'admin_fee': 5000000000, 'xcp_profit': 1115427318126171392, 'xcp_profit_a': 1115427318126171392, 'virtual_price': 1057723648237714816}, 'coins': {'names': ['WETH', 'CVX'], 'addresses': ['0xC02aaA39b223FE8D0A0e5C4F27eAD9083C756Cc2', '0x4e3FBD56CD56c3e72c1403e103b45Db9da5B9D2B'], 'decimals': [18, 18]}, 'reserves': {'by_coin': [1639768592332589891584, 1753351707501938467667968], 'unnormalized_by_coin': [1639768592332589891584, 1753351707501938467667968]}, 'basepool': None, 'timestamp': 1725915137} + +datab = {'name': 'USD0/USDC', 'address': '0x14100f81e33C33Ecc7CDac70181Fb45B6E78569F', 'chain': 'mainnet', 'symbol': 'USD0/USDC', 'pool_type': 'stableswapng', 'params': {'A': 250, 'fee': 4000000, 'fee_mul': 20000000000, 'admin_fee': 5000000000, 'virtual_price': 1010404616597260544}, 'coins': {'names': ['USD0', 'USDC'], 'addresses': ['0x73A15FeD60Bf67631dC6cd7Bc5B6e8da8190aCF5', '0xA0b86991c6218b36c1d19D4a2e9Eb0cE3606eB48'], 'decimals': [18, 6]}, 'reserves': {'by_coin': [5526802210101440083394560, 6484456300067744800309248], 'unnormalized_by_coin': [5526802210101440083394560, 6484456300067]}, 'basepool': None, 'timestamp': 1727994703} + +poolb/SimMarketConfig.key_metadata = %datab +#poolb/SimMarketConfig.overrides = {"A": [1000,], "fee": [2000000,], "fee_mul": [50000000000,]} +datac = {'name': 'USD0/USDC', 'address': '0x14100f81e33C33Ecc7CDac70181Fb45B6E78569F', 'chain': 'mainnet', 'symbol': 'USD0/USDC', 'pool_type': 'stableswapng', 'params': {'A': 1000, 'fee': 2000000, 'fee_mul': 50000000000, 'admin_fee': 5000000000, 'virtual_price': 1010404616597260544}, 'coins': {'names': ['USD0', 'USDC'], 'addresses': ['0x73A15FeD60Bf67631dC6cd7Bc5B6e8da8190aCF5', '0xA0b86991c6218b36c1d19D4a2e9Eb0cE3606eB48'], 'decimals': [18, 6]}, 'reserves': {'by_coin': [5526802210101440083394560, 6484456300067744800309248], 'unnormalized_by_coin': [5526802210101440083394560, 6484456300067]}, 'basepool': None, 'timestamp': 1727994703} + +poolc/SimMarketConfig.key_metadata = %datac # ----- SIMULATION CONTEXT ----- # +time_sequence = "DateTimeSequence.from_range(start='2024-05-01', end='2024-08-07', freq='1h')" +SimulationContext.time_sequence = %time_sequence +#SimulationContext.reference_market = @SimpleReferenceMarket() +#SimulationContext.sim_market_factory = sim_market_factory # REMOVE +#SimulationContext.sim_market_parameters = sim_market_parameters # REMOVE +SimulationContext.trader_class = "VolumeLimitedArbitrageur" #@VolumeLimitedArbitrageur() +SimulationContext.log_class = "StateLog" #@StateLog +SimulationContext.metric_classes = %curvesim.pipelines.common.DEFAULT_METRICS # "DEFAULT_METRICS"? +# simulation_layout? +SimulationContext.simulation_runs = { + "Protocol X Token Y/Token Z Pool": { + "markets": [ + @pool/SimMarketConfig(), + ], + "vol_mult": None, + }, + "Sim run #2": { + "markets": [ + @poolb/SimMarketConfig(), + ], + "vol_mult": None, + }, + "USDC/USD0 change": { + "markets" : [ + @poolc/SimMarketConfig(), + ], + "vol_mult": None, + } +} +#SimulationContext.vol_mult = None # remove -SimulationContext.time_sequence = @FrequencyTimePeriod() -SimulationContext.reference_market = @SimpleReferenceMarket() -SimulationContext.sim_market_factory = sim_market_factory -SimulationContext.sim_market_parameters = sim_market_parameters -SimulationContext.trader_class = @VolumeLimitedArbitrageur -SimulationContext.log_class = @StateLog -SimulationContext.metric_classes = DEFAULT_METRICS +# annotate these and import their modules above: sim_market_factory = curvesim.pipelines.simulation.config_to_sim_market? (don't register?), trader_class = VolumeLimitedArbitrageur and SimpleArbitrageur, log_class = StateLog, metric_classes = curvesim.pipelines.common.DEFAULT_METRICS, time_sequence = curvesim.templates.time_sequence TimeSequence and DateTimeSequence +# not added yet: ReferenceMarket, diff --git a/curvesim/pipelines/simple/__init__.py b/curvesim/pipelines/simple/__init__.py index 9ddce1f67..613875e37 100644 --- a/curvesim/pipelines/simple/__init__.py +++ b/curvesim/pipelines/simple/__init__.py @@ -84,9 +84,9 @@ def pipeline( # pylint: disable=too-many-locals param_sampler = ParameterizedPoolIterator(pool, variable_params, fixed_params) price_sampler = PriceVolume(asset_data) - _metrics = init_metrics(DEFAULT_METRICS, pool=pool) - strategy = SimpleStrategy(_metrics) + metrics = init_metrics(DEFAULT_METRICS, pool=pool) + strategy = SimpleStrategy(metrics) - output = run_pipeline(param_sampler, price_sampler, strategy, ncpu=ncpu) - results = make_results(*output, _metrics) + output = run_pipeline(param_sampler, price_sampler, strategy, metrics, ncpu=ncpu) # might be deprecated - center pipeline() around a SimContext object? + results = make_results(*output, metrics) return results diff --git a/curvesim/pipelines/simple/trader.py b/curvesim/pipelines/simple/trader.py index 710402fcf..d4fccd985 100644 --- a/curvesim/pipelines/simple/trader.py +++ b/curvesim/pipelines/simple/trader.py @@ -1,3 +1,5 @@ +import gin + from curvesim.logging import get_logger from curvesim.templates.trader import Trade, Trader @@ -6,13 +8,14 @@ logger = get_logger(__name__) +@gin.register class SimpleArbitrageur(Trader): """ Computes, executes, and reports out arbitrage trades. """ # pylint: disable-next=arguments-differ,too-many-locals - def compute_trades(self, prices): + def compute_trades(self, pool, prices): # maybe add * param at end- volmult just gets ignored here """ Compute trades to arbitrage the pool, as follows: 1. For each coin pair i and j, calculate size of coin i @@ -23,6 +26,9 @@ def compute_trades(self, prices): Parameters ---------- + pool : :class:`~curvesim.pipelines.templates.SimPool` + The pool to arbitrage. + prices : pandas.Series Current market prices from the price_sampler. @@ -34,7 +40,6 @@ def compute_trades(self, prices): additional_data: dict Dict of additional data to be passed to the state log as part of trade_data. """ - pool = self.pool trades = get_arb_trades(pool, prices) max_profit = 0 diff --git a/curvesim/pipelines/simulation.py b/curvesim/pipelines/simulation.py index 7fa5f99f0..951feeb45 100644 --- a/curvesim/pipelines/simulation.py +++ b/curvesim/pipelines/simulation.py @@ -1,9 +1,33 @@ import os +from copy import deepcopy +from datetime import datetime, timedelta, timezone +from typing import Any, Union, Tuple, List import gin -from curvesim.metrics.results import SimResults, make_results +from pandas import concat + +from curvesim.metrics import SimResults, make_results, init_metrics from curvesim.pipelines import run_pipeline +from curvesim.pipelines.common import get_asset_data +from curvesim.iterators.price_samplers import PriceVolume +from curvesim.templates import SimPool, TimeSequence, DateTimeSequence +from curvesim.pool import get_sim_pool +from curvesim.pool_data import get_metadata, get_pool_volume +from curvesim.pool_data.metadata import PoolMetaData, PoolMetaDataInterface +#from curvesim.pool_data.metadata.sim_market_config import SimMarketConfig +from curvesim.utils import is_address +from curvesim.constants import CurvePreset +from curvesim.logging import get_logger + +from curvesim.iterators.param_samplers import ParameterizedPoolIterator + +from curvesim.pipelines.simple.trader import SimpleArbitrageur # import these two from common module +from curvesim.pipelines.vol_limited_arb.trader import VolumeLimitedArbitrageur # remove import of this from vol_limited_arb.__init__ +from curvesim.metrics.state_log import StateLog + + +logger = get_logger(__name__) @gin.configurable @@ -26,65 +50,185 @@ class SimulationContext: def __init__( self, time_sequence, - reference_market, - sim_market_factory, - sim_market_parameters, + #data_source, + # reference_market, + # sim_market_factory, + # sim_market_parameters, trader_class, log_class, metric_classes, + simulation_runs, + #vol_mult, ): """ all constructor args will be Gin injected """ - self.time_sequence = time_sequence - self.reference_market = reference_market - self.sim_market_factory = sim_market_factory - self.sim_market_parameters = sim_market_parameters - self.trader_class = trader_class - self.log_class = log_class - self.metric_classes = metric_classes - - def executor(self, sim_market): + # self.time_sequence = time_sequence + # self.reference_market = reference_market + # self.sim_market_factory = sim_market_factory + # self.sim_market_parameters = sim_market_parameters + self.time_sequence = eval(time_sequence) if time_sequence else make_default_time_sequence() # force/allow list instead? + #self.time_sequence = make_default_time_sequence() + # pass in string name of trader/log class and this instantiates here + self.trader_class = eval(trader_class) + self.log_class = eval(log_class) + self.metric_classes = metric_classes # eval? + + # need data source param + self.data_source = "coingecko" # force/allow list like for time_sequence? + # only support coingecko for now? local needs some cleaning up + # curve prices ApiDataSource class? + + # where to store the meta types per run? + + # experimental + #assert isinstance(simulation_runs, dict) + self.simulation_runs = simulation_runs + + # TODO: make time_sequence always a list? + # time_sequence = eval(time_sequence) if time_sequence else make_default_time_sequence() + # if isinstance(time_sequence, TimeSequence): + # self.time_sequence = time_sequence + # elif isinstance(time_sequence, list) and all([isinstance(ts, TimeSequence) for ts in time_sequence]): + # if len(time_sequence) == len(self.simulation_runs.keys()): + # self.time_sequence = time_sequence + # else: + # raise ValueError + # else: + # raise TypeError + + # data_source = eval(data_source) if data_source else "coingecko" + # if isinstance(data_source, str): + # self.data_source = data_source + # elif isinstance(data_source, list) and all([isinstance(src, str) for src in data_source]): + # if len(data_source) == len(self.simulation_runs.keys()): + # self.data_source = data_source + # else: + # raise ValueError + # else: + # raise TypeError + + + # what's next for our api needs? discuss with Naga + # use curve prices api instead of subgraph for snapshot (get_pool_data) + + + # NOT RECOMMENDED TO CONFIGURE + # ---------------------------- + self.vol_mult = None + + + def _get_trader_inputs(self, sample) -> Tuple: + """ + Process the price sample into appropriate inputs for the + trader instance. + + Parameters + + Returns + + Raises + """ + if self.trader_class is SimpleArbitrageur: + return (sample.prices) + elif self.trader_class is VolumeLimitedArbitrageur: + prices = sample.prices + volumes = sample.volumes + vol_mult = self.vol_mult + + volume_limits = {key: volumes[key] * vol_mult[key] for key in volumes} + reversed_limits = {(j, i): lim * prices[(i, j)] for (i, j), lim in volume_limits.items()} + all_limits = {**volume_limits, **reversed_limits} + normalized_vol_limits = {key: int(val * 10**18) for key, val in all_limits.items()} + + return (prices, normalized_vol_limits) + else: + pass + #raise CurvesimTypeError("TODO") # TODO + + + def executor(self, sim_market, parameters, price_volume, metrics): """ Executes a trading strategy for the given sim market and time sequence. + + Returns + ------- + + tuple of dataframes """ # These all use Gin injection, completely separating # any need to consider constructor dependencies from # this logic. + + # applied once per run - referencemarket and timesequence are supposed to be agnostic of trading strategy (eg volume-limited) trader = self.trader_class() - log = self.log_class() - sim_market.prepare_for_run() + log = self.log_class(sim_market) # needs pool and metrics, but maybe remove pool (at minimum) later + + # (if have multiple pools in one sim) + # every run, you probably only care about the metrics recorded on one pool (in multipool run, why assume a trader will route their trades through the specific pools loaded?) + + parameter_changes = str(parameters) if parameters else "no parameter changes" + logger.info("[%s] Simulating with %s", sim_market.symbol, parameter_changes) + + sim_market.prepare_for_run(price_volume.prices) # needs pricesampler.prices (DF) + + price_sampler = iter(price_volume) for timestep in self.time_sequence: - sim_market.prepare_for_trades(timestep) - sample = self.reference_market.prices(timestep) - trade_data = trader.process_time_sample(sample, sim_market) + sim_market.prepare_for_trades(timestep) # move pool timestamp + #sample = self.reference_market.prices(timestep) # TODO: ReferenceMarket (wrapper around PriceVolume) + # we assume that the PriceVolume object's timestamps have been aligned with that of self.time_sequence + price_volume_sample = next(price_sampler) # iterate over PriceVolume obj itself above? (for now) + # use price_sampler timestamp index or not? or price sample timestamp? above works only if timesequence and sampler timestamps align + + trader_args = self._get_trader_inputs(price_volume_sample) + + #trade_data = trader.process_time_sample(sample, sim_market) # this is computing trades with prices at current timestamp + trade_data = trader.process_time_sample(sim_market, *trader_args) + + # update after log.update( - price_sample=sample, + pool=sim_market, + price_sample=price_volume_sample, trade_data=trade_data, - sim_market=sim_market, + #sim_market=sim_market, ) - return log.compute_metrics() + run_logs = log.get_logs() + + return compute_metrics(run_logs, metrics) # change later + @property - def configured_sim_markets(self): - sim_market_parameters = self.sim_market_parameters - sim_market_factory = self.sim_market_factory + def configured_sim_markets(self): # rename configured_sim_runs? + #sim_market_factory = self.sim_market_factory + + for sim_name, sim_info_dict in self.simulation_runs.items(): + # for now, only one market per run will be supported + for sim_market_config in sim_info_dict["markets"]: + + pool = config_to_sim_market(sim_market_config) + variable_params = deepcopy(sim_market_config.overrides) + metadata_template = config_to_metadata(sim_market_config) + + if sim_market_config.include_template: + #metadata_template = config_to_metadata(sim_market_config) + template_params = metadata_template.init_kwargs() - initial_params = sim_market_parameters.initial_parameters - all_run_params = sim_market_parameters.run_parameters + for param_name, value_list in variable_params.items(): + include = template_params[param_name] # only template params that have corresponding overrides need to be included in variable_params + new_list = [include] + value_list + variable_params[param_name] = new_list - yield sim_market_factory.create(**initial_params) + vol_mult = sim_info_dict["vol_mult"] - for run_params in all_run_params: - init_kwargs = initial_params.copy() - init_kwargs.update(run_params) - sim_market = sim_market_factory.create(**init_kwargs) - yield sim_market + yield (ParameterizedPoolIterator(pool, variable_params=variable_params), metadata_template, vol_mult) + # [yield ParameterizedPoolIterator for each simmarket in this sim] + # for multiple, create empty list in loop above and append pools and then yield at end of loop above? - def run_simulation(self, ncpu=None) -> SimResults: + + def run_simulation(self, ncpu=None) -> List[SimResults]: """ Parameters ---------- @@ -102,10 +246,140 @@ def run_simulation(self, ncpu=None) -> SimResults: cpu_count = os.cpu_count() ncpu = cpu_count if cpu_count is not None else 1 - configured_sim_markets = self.configured_sim_markets - initial_sim_market = next(configured_sim_markets) - output = run_pipeline(configured_sim_markets, self.executor, ncpu) + every_result: List[SimResults] = [] + + sim_counter = 0 + + for configured_sim_markets, sim_market_metadata, vol_mult in self.configured_sim_markets: + template_sim_market = configured_sim_markets.pool_template + + metrics = init_metrics(self.metric_classes, pool=template_sim_market) + + # TODO: make time_sequence always a list? + if isinstance(self.time_sequence, list): + time_sequence = self.time_sequence[sim_counter] + else: + time_sequence = self.time_sequence + + if isinstance(self.data_source, list): + data_source = self.data_source[sim_counter] + else: + data_source = self.data_source + + asset_data, _ = get_asset_data(sim_market_metadata, time_sequence, data_source) + price_volume = PriceVolume(asset_data) + + if vol_mult is None: + pool_volume = get_pool_volume( + sim_market_metadata, time_sequence[0], time_sequence[-1] + ) + # TODO: mark these two as DataFrames properly + vol_mult = pool_volume.sum() / price_volume.volumes.sum() + logger.info("Volume Multipliers:\n%s", vol_mult.to_string()) + vol_mult = vol_mult.to_dict() + + self.vol_mult = vol_mult + + output = run_pipeline(configured_sim_markets, price_volume, self.executor, metrics, ncpu=ncpu) + + results = make_results(*output, metrics) + every_result.append(results) + + logger.info("done lol") # TODO: one run finished, onto the next one - use name from simulation_runs? + + sim_counter += 1 + + return every_result + + +@gin.register # unregister this? +def config_to_metadata(sim_market_config: SimMarketConfig) -> PoolMetaDataInterface: + """ + returns default PoolMetaDataInterface (no override) based on key_metadata supplied for instantiation + """ + metadata = sim_market_config.key_metadata + + if is_address(metadata): # TODO reflect simmarketconfig changes + address = metadata + chain = sim_market_config.chain + end_ts = sim_market_config.end_ts + metadata_interface = get_metadata(address, chain=chain, end_ts=end_ts) + + if isinstance(metadata, dict): # dict in "subgraph" style (actually a curvesim-defined format) + if "preset" in metadata: + metadata_dict = create_preset_metadata(metadata) # returns dict in our defined format + metadata_interface = PoolMetaData(metadata_dict) + else: + metadata_interface = PoolMetaData(metadata) + + if isinstance(metadata, PoolMetaDataInterface): + metadata_interface = metadata + + return metadata_interface + + +@gin.register # unregister this? +def config_to_sim_market(sim_market_config: SimMarketConfig) -> SimPool: + """ + Factory function for simmarkets + """ + metadata_interface = config_to_metadata(sim_market_config) + chain = sim_market_config.chain + balanced = sim_market_config.balanced + balanced_base = sim_market_config.balanced_base + end_ts = sim_market_config.end_ts + + sim_market = get_sim_pool(metadata_interface, chain=chain, balanced=balanced, balanced_base=balanced_base, end_ts=end_ts) + + return sim_market + + + +def compute_metrics(state_logs, metrics) -> tuple: + """ + Computes metrics from the accumulated log data. + + make sure state_logs and metrics._pool are referring to the same pool + + Parameters TODO: write + ---------- + state_logs : dict + from statelog or just log + + metrics : List[Metric] + already initialized + + Returns + ------- + + tuple of dataframes + """ + data_per_run = state_logs["pool_parameters"] + metric_data = [metric.compute(state_logs) for metric in metrics] + data_per_trade, summary_data = tuple(zip(*metric_data)) # transpose tuple list + + data_per_trade = concat(data_per_trade, axis=1) + summary_data = concat(summary_data, axis=1) + + return ( + data_per_run, + data_per_trade, + summary_data, + ) + + +def make_default_time_sequence() -> DateTimeSequence: + t_end = datetime.now(timezone.utc) - timedelta(days=1) + t_end = t_end.replace(hour=23, minute=0, second=0, microsecond=0) + t_start = t_end - timedelta(days=60) + timedelta(hours=1) + time_sequence = DateTimeSequence.from_range(start=t_start, end=t_end, freq="1h") + + return time_sequence - metrics = [Metric(pool=initial_sim_market) for Metric in self.metric_classes] - results = make_results(*output, metrics) - return results +# allow creation of PoolMetaDataBase child classes in .gin? +# helper function: simulation_runs_setting(# of pipelines) +# rigourously define (sim) run, pipeline, simulation, etc. for docs +# one day simulate param changes within a run? +# option to parallelize sims? (after reading all data necessary for all sims) + # append info for each run to list in for loop, schedule coroutines? +# make pkg min required version 3.10 \ No newline at end of file diff --git a/curvesim/pipelines/vol_limited_arb/__init__.py b/curvesim/pipelines/vol_limited_arb/__init__.py index 86bf1d45e..8a87f1272 100644 --- a/curvesim/pipelines/vol_limited_arb/__init__.py +++ b/curvesim/pipelines/vol_limited_arb/__init__.py @@ -13,6 +13,7 @@ from .. import run_pipeline from ..common import DEFAULT_METRICS, get_asset_data, get_pool_data from .strategy import VolumeLimitedStrategy +from .trader import VolumeLimitedArbitrageur # import trade class to top level? logger = get_logger(__name__) @@ -115,9 +116,9 @@ def pipeline( metrics = metrics or DEFAULT_METRICS metrics = init_metrics(metrics, pool=pool) - strategy = VolumeLimitedStrategy(metrics, vol_mult) + strategy = VolumeLimitedStrategy(metrics, vol_mult) # might be deprecated - center pipeline() around a SimContext object? - output = run_pipeline(param_sampler, price_sampler, strategy, ncpu=ncpu) + output = run_pipeline(param_sampler, price_sampler, strategy, metrics, ncpu=ncpu) results = make_results(*output, metrics) return results diff --git a/curvesim/pipelines/vol_limited_arb/trader.py b/curvesim/pipelines/vol_limited_arb/trader.py index 46be9c097..7d0741fb5 100644 --- a/curvesim/pipelines/vol_limited_arb/trader.py +++ b/curvesim/pipelines/vol_limited_arb/trader.py @@ -1,5 +1,7 @@ from pprint import pformat +import gin + from numpy import isnan from scipy.optimize import least_squares @@ -11,17 +13,21 @@ logger = get_logger(__name__) +@gin.register class VolumeLimitedArbitrageur(Trader): """ Computes, executes, and reports out arbitrage trades. """ - def compute_trades(self, prices, volume_limits): # pylint: disable=arguments-differ + def compute_trades(self, pool, prices, volume_limits): # pylint: disable=arguments-differ # add * args at end - those get ignored """ Computes trades to optimally arbitrage the pool, constrained by volume limits. Parameters ---------- + pool : :class:`~curvesim.pipelines.templates.SimPool` + The pool to arbitrage. + prices : dict Current market prices from the price_sampler. @@ -37,9 +43,9 @@ def compute_trades(self, prices, volume_limits): # pylint: disable=arguments-di additional_data: dict Dict of additional data to be passed to the state log as part of trade_data. """ - + # make volume_limits volumes + volume_multiples instead and move VolumeLimitedStrategy compute volume limits + get trader inputs to this class trades, errors, _ = multipair_optimal_arbitrage( - self.pool, prices, volume_limits + pool, prices, volume_limits ) return trades, {"price_errors": errors} @@ -58,7 +64,7 @@ def multipair_optimal_arbitrage( # noqa: C901 pylint: disable=too-many-locals prices : dict Current market prices from the price_sampler. - volume_limits : dict + limits : dict Current volume limits for each trading pair. Returns @@ -143,7 +149,10 @@ def _apply_volume_limits(arb_trades, limits, pool): excluded_trades = [] for trade in arb_trades: pair = trade.coin_in, trade.coin_out - limited_amount_in = min(limits[pair], trade.amount_in) + try: + limited_amount_in = min(limits[pair], trade.amount_in) + except KeyError: + limited_amount_in = min(limits[pair[::-1]], trade.amount_in) lim_trade = trade.replace_amount_in(limited_amount_in) if lim_trade.amount_in > pool.get_min_trade_size(lim_trade.coin_in): diff --git a/curvesim/pool/__init__.py b/curvesim/pool/__init__.py index 677283d1f..d5c556cf0 100644 --- a/curvesim/pool/__init__.py +++ b/curvesim/pool/__init__.py @@ -144,7 +144,7 @@ def get_pool( *, normalize=False, end_ts=None, - env="prod", + env="prod", # TODO deprecate ): """ Factory function for creating a pool based on metadata pulled from on-chain. @@ -180,7 +180,7 @@ def get_pool( raise CurvesimValueError("`end_ts` has no effect unless pool address is used.") if isinstance(pool_metadata, str): - pool_metadata = get_metadata(pool_metadata, chain=chain, env=env, end_ts=end_ts) + pool_metadata = get_metadata(pool_metadata, chain=chain, end_ts=end_ts) elif isinstance(pool_metadata, dict): pool_metadata = PoolMetaData(pool_metadata) @@ -211,7 +211,7 @@ def get_sim_pool( balanced_base=True, custom_kwargs=None, end_ts=None, - env="prod", + env="prod", # TODO deprecate ): """ Factory function for creating a sim pool based on metadata pulled from on-chain. @@ -259,7 +259,7 @@ def get_sim_pool( raise CurvesimValueError("`end_ts` has no effect unless pool address is used.") if isinstance(pool_metadata, str): - pool_metadata = get_metadata(pool_metadata, chain=chain, env=env, end_ts=end_ts) + pool_metadata = get_metadata(pool_metadata, chain=chain, end_ts=end_ts) elif isinstance(pool_metadata, dict): pool_metadata = PoolMetaData(pool_metadata) diff --git a/curvesim/pool/sim_interface/cryptoswap.py b/curvesim/pool/sim_interface/cryptoswap.py index ef3bc68a0..015fad079 100644 --- a/curvesim/pool/sim_interface/cryptoswap.py +++ b/curvesim/pool/sim_interface/cryptoswap.py @@ -179,7 +179,7 @@ def prepare_for_run(self, prices): Parameters ---------- - timestamp : pandas.DataFrame + prices : pandas.DataFrame The price time_series, price_sampler.prices. """ xcp = self._get_xcp(self.D) diff --git a/curvesim/pool_data/__init__.py b/curvesim/pool_data/__init__.py index b6acb0fff..048c0a464 100644 --- a/curvesim/pool_data/__init__.py +++ b/curvesim/pool_data/__init__.py @@ -2,7 +2,7 @@ Tools for fetching pool metadata and volume. Currently supports stableswap pools, metapools, rebasing (RAI) metapools, -and 2-token cryptopools. +and 2 or 3-token cryptopools. """ __all__ = ["get_metadata", "get_pool_assets", "get_pool_volume"] diff --git a/curvesim/pool_data/metadata/cryptoswap.py b/curvesim/pool_data/metadata/cryptoswap.py index 4bfdfc496..0190ce255 100644 --- a/curvesim/pool_data/metadata/cryptoswap.py +++ b/curvesim/pool_data/metadata/cryptoswap.py @@ -23,9 +23,9 @@ def init_kwargs(self, normalize=True): "price_scale": data["params"]["price_scale"], "admin_fee": data["params"]["admin_fee"], "ma_half_time": data["params"]["ma_half_time"], - "virtual_price": data["reserves"]["virtual_price"], "xcp_profit": data["params"]["xcp_profit"], "xcp_profit_a": data["params"]["xcp_profit_a"], + "virtual_price": data["params"]["virtual_price"], } n = kwargs["n"] diff --git a/curvesim/pool_data/metadata/sim_market_config.py b/curvesim/pool_data/metadata/sim_market_config.py new file mode 100644 index 000000000..994d4e6c0 --- /dev/null +++ b/curvesim/pool_data/metadata/sim_market_config.py @@ -0,0 +1,460 @@ +from typing import Union, Optional, Any, Dict +from datetime import datetime # TODO remove? +from time import time +from copy import deepcopy + +import gin + +from curvesim.utils import dataclass +from curvesim.constants import Chain, CurvePreset +from curvesim.exceptions import CurvesimTypeError, CurvesimValueError +from curvesim.utils import Address, is_address, to_address +from curvesim.pipelines.common.get_pool_data import _parse_timestamp # TODO don't need +#from curvesim.pipelines.common import get_pool_data + +#from . import PoolMetaDataInterface + + +# TODO: consider moving everything to curvesim.pipelines.simulation.py or curvesim.pipelines or curvesim.pipelines.common +@gin.register +class SimMarketConfig: + """ + class for config, one per simmarket (regardless of # of simmarket per run) (so same simmarket config is reusable) + treat it as read-only after instantiation + """ + def __init__( + self, + key_metadata: Union[str, dict, PoolMetaDataInterface], + overrides: dict[str, list[Any]] = {}, + include_template: bool = True, + chain: Union[str, Chain] = "mainnet", + balanced: bool = True, + balanced_base: bool = True, + end_ts: Union[int, datetime, None] = None, #TODO rethink necessity + ): + """ + Parameters + ---------- + key_metadata : Union[str, dict, PoolMetaDataInterface] + address, preset name, meta dict, meta class + + overrides : dict[str, list[Any]], optional + TODO + should be like pool param iterator's variable_params: + dict[str, list[Any]] = {"value1": [...], "value2" : [...], etc.} + + if doing overrides the parameterizedpooliterator way, no need to specify # of runs; # of runs = product of number of overrides for each varaible param + overrides should be unique + + include_template: bool, optional + + chain + + balanced + + + balanced_base + + + end_ts + + TODO: pool_ts param like in pipeline()? + """ + if not isinstance(key_metadata, (str, dict, PoolMetaDataInterface)): + raise CurvesimTypeError(f"{key_metadata} TODO") + + if isinstance( + key_metadata, str + ): # address and strenum will resolve to str type + if is_address(key_metadata, checksum=False): + key_metadata = to_address(key_metadata) + else: + try: + key_metadata = CurvePreset(key_metadata) + except ValueError: + raise CurvesimValueError( + f"TODO {key_metadata} - str but not valid address or preset name" + ) + + if isinstance(overrides, dict): + not_str = list(filter(lambda key: not isinstance(key, str), overrides.keys())) + if not_str != []: + raise CurvesimValueError(f"need str keys {not_str}") + + # just tell user not to use duplicates in any list + # want a fixed value (diff from template value)? - provide exactly one value - will be used for all sims + else: + raise CurvesimTypeError(f"overrides needs dict {overrides}") + + self.key_metadata: Union[Address, CurvePreset, dict, PoolMetaDataInterface] = key_metadata # sim context will handle parsing diff key metadata types + self.overrides: dict[str, list[Any]] = overrides + self.include_template: bool = include_template + self.chain: Chain = Chain(chain) + self.balanced: bool = balanced + self.balanced_base: bool = balanced_base + self.end_ts: Optional[int] = _parse_timestamp(end_ts) + + +# if not isinstance(metadata, (str, dict, PoolMetaDataInterface)): +# raise CurvesimTypeError + +# if isinstance(metadata, str): +# if is_address(metadata, checksum=False): +# metadata = {"address": to_address(metadata)} +# else: +# raise CurvesimValueError + +# if isinstance(metadata, dict): +# if "address" in metadata: +# metadata["address"] = to_address(metadata["address"]) + +# if "address" in metadata and all([key in dict_id_1 for key in metadata]): +# config_base = deepcopy(metadata) +# _a(config_base) +# elif all([key in dict_id_2 for key in metadata]) and all([key in metadata for key in dict_id_2]): +# config_base = deepcopy(metadata) +# _b(metadata) +# elif "preset" in metadata and all([key in dict_id_3 for key in metadata]): +# config_base = deepcopy(metadata) +# _c(config_base) +# else: +# raise CurvesimValueError + +# if isinstance(metadata, PoolMetaDataInterface): +# config_base = metadata + +# self._config_base = config_base + +# call config_to_metadata and config_to_sim_market right here? (cached after) + +# take in any of the below types for key_metadata in init, create "full" value for its type and store in attr _config_base, move configtometadata and configtosimmarket to this class? and expose poolmetadatabase and simmarket as cached properties using those functions (which take include_template and overrides into account) + + +# keep overrides and include_template above as init params +address = str # this maps to dict below + +dict_id_1 = { + "address": str, + "chain": str, + "balanced": bool, + "balanced_base": bool, + "timestamp": int, +} # chain, balanced, balanced_base, timestamp not required +# unique id: any subset of this dict containing "address" + +def _a(dict_type: dict) -> None: + if not ("address" in dict_type) or not (isinstance(dict_type["address"], str)): + raise CurvesimValueError(f"") # TODO + + optional_keys = ["chain", "balanced", "balanced_base", "timestamp"] + defaults = {"chain": "mainnet", "balanced": True, "balanced_base": True, "timestamp": int(time())} + + for key in defaults: + if dict_type.get(key) == None: + dict_type.update((key, defaults[key])) + else: + value = dict_type[key] + expected_type = dict_id_1[key] + if not isinstance(value, expected_type): + raise CurvesimValueError(f"SimMarketConfig key_metadata {key}:{value} - expected type {expected_type}") # TODO + + + +dict_id_2 = { + "name": str, + "address": str, + "chain": str, + "symbol": str, + "pool_type": str, + "params": dict, + "coins": dict, # {"names": list, "addresses": list, "decimals": list,} + "reserves": dict, # {"by_coin": list, "unnormalized_by_coin": list, } + "basepool": Optional[dict], + "timestamp": int, +} # everything required (to avoid network call) +# unique id: contains exactly these config_base = function(metadata) + +def _b(dict_type: dict) -> None: + if not(all([key in dict_id_2 for key in dict_type]) and all([key in dict_type for key in dict_id_2])): + raise CurvesimValueError(f"") # TODO + + for key in dict_type: + value = dict_type[key] + expected_type = dict_id_2[key] + if not isinstance(value, expected_type): + raise CurvesimValueError(f"") # TODO + +dict_id_3 = { + "name": Optional[str], + "address": Optional[str], + "chain": Optional[str], + "symbol": Optional[str], + "coins": Optional[dict], # {"names": list, "addresses": list, "decimals": list, "prices": list,} + "tvl": Optional[float], + "preset": str, + "basepool": Optional[str], # deployed contracts only, if used + "timestamp": Optional[int], +} +# unique id: contains "preset", error raising will handle the finer details + +# offline function +def _c(dict_type: dict) -> None: + try: + preset: str = dict_type["preset"].lower() + preset_check: CurvePreset = CurvePreset(preset) + dict_type["preset"] = preset.lower() + except ValueError as e: # not a CurvePreset + raise CurvesimValueError(f"") from e # TODO + except KeyError as e: # "preset" not in dict_type + raise CurvesimValueError(f"") from e # TODO + + for key in dict_type: + value = dict_type[key] + expected_type = dict_id_3[key] + if not isinstance(value, expected_type): + raise CurvesimValueError(f"") # TODO + + knowable_defaults = { + "name": None, + "address": None, + "chain": "mainnet", + "symbol": None, + "coins": { + "names": None, + "addresses": None, + "decimals": None, + "prices": None, + }, + "tvl": None, + "preset": None, + "basepool": None, + "timestamp": int(time()), + } + + for key in knowable_defaults: + if not key in dict_type: + value = knowable_defaults[key] + dict_type.update((key, value)) + + if key == "coins": + for param in knowable_defaults["coins"]: + if not param in dict_type["coins"]: + dict_type["coins"].update((param, knowable_defaults["coins"][param])) + + deployed_pool: bool = dict_type["address"] != None + + tvl_given: bool = dict_type["tvl"] != None + + deployed_coins: bool = isinstance(dict_type["coins"]["addresses"], list) and tvl_given + + no_contracts: bool = isinstance(dict_type["coins"]["names"], list) and isinstance(dict_type["coins"]["prices"], list) + no_contracts = no_contracts and tvl_given + + if deployed_pool: + dict_type.update(("use internet", "deployed pool")) + elif deployed_coins: + dict_type.update(("use internet", "deployed coins")) + elif no_contracts: + dict_type.update(("use internet", "nothing")) + else: + raise CurvesimValueError(f"") # TODO + +# create_preset_metadata() - preset_to_curvesim_dict +# preset - required + # infer pool type - explicit mapping from preset name + # infer stableswap/cryptoswap by mapping from preset name +# tvl - optional - assume balanced - precisions and pricing data matter +# chain - optional - default is "mainnet"/"ethereum" +# user-defined pool name and symbol - optional (defaults for either if not provided ONLY IF coin names are known - we know coin names either from coin addresses or user supply) +# coins - optional + # list of addresses of existing tokens (take note of the order) - optional + # optional since protocols may want to add their non-existent tokens (or new tokens with no pricing data/ liquidity for price reference) + # no support for erc4626, custom oracle, rebasing, etc. tokens + # list of usd prices - optional param (but no default value) + # default network call being curve prices will suffice for coins that are in Curve's system (if not supplied by user) + # consider switching to coingecko if curve prices fails - if both fail (prices not provided by user but addresses are), raise network exception + # if no coin addresses and no usd prices and no pool address, raise exception + # precision - optional param inside coins dict (if neither precisions nor addresses are provided, default value is 18 for all) + # get from curve prices +# basepool - optional addr - format properly in to_metadata? +# start timestamp - optional (default is now - 60 days) +# .gin file data type: dict with "preset_name" as the unique trait +# function input type: dict of 1 of 3 types +# put function here to make simmarketconfigs having preset dicts reusable +# can keep CurvePreset around though, just to keep track of the preset names (don't give to SimMarketConfig) + +# Note: timestamp and chain are already given default values in _c() +# (online call) (existing pool) - tier 1 + # from_address(address, chain, end_ts=timestamp) (get_pool_data if doesn't work) + # ignore everything else + # apply preset params to curvesim_dict["params"] - however the unfilled (None) ones from this file can be ignored + # apply tvl? - curvesim_dict["reserves"] overwrite? +# (online call) (existing coins, pool maybe nonexistent) - tier 2 + # if basepool, only accept 1 coin in coins (other is lp token of basepool) IF STABLESWAP PRESET + # fetch whole curvesim dict at timestamp for the basepool from curve_prices + # fetch coin prices and names, precisions at timestamp from coins[addresses] - use curve prices for coins in Curve's system, coingecko (or etherscan?) for when curve prices fails + # ignore coins[names, decimals, prices] + # coins[addresses] and chain - if basepool applies, need lp token price from different curve prices endpoint? + # calculate curvesim_dict["reserves"] based on tvl and above fetched coin info + # use name and symbol in curvesim dict if included + # construct curvesim dict from above and insert into it all params for preset +# (no call) (coins and pool nonexistent) - tier 3 + # construct default name and symbol from coins[names] (in order) + # address can be None, chain as "mainnet" default or chain is fine + # calculate curvesim_dict["reserves"] based on tvl and coins[prices] and coins[decimals] (if supplied) + # ignore basepool + # apply preset to curvesim_dict["params"], add timestamp to curvesim_dict + # (unrelated) user needs to supply a CsvDataSource (or other type for ReferenceMkt) for the coins (prices for the whole sim); query by SimAsset (not necessarily OnChainAssetPair) base/quote symbols + + + +# returns dict - just adds predefined pool params (and determines pool type) on top of user-supplied info + # for every pool type, there are used params and unused params (ie unnecessary to python pool init) - fill params (both types) fully based on pool type + +# inside .gin, the dict["preset"] should be mapped to a str, not a CurvePreset + +# unfilled params +params_not_preset = { + "stableswap": { + "admin_fee": None, + # curve.fi preset includes maExpTime, but our stableswap pool.py does not + }, + + "cryptoswap": { + "price_scale": None, + "price_oracle": None, + "last_prices": None, + "last_prices_timestamp": None, + "admin_fee": None, + "xcp_profit": None, + "xcp_profit_a": None, + }, +} + +# invariant type +preset_to_invariant = { + "fiat redeemable stablecoins": "stableswap", + "crypto collateralized stablecoins": "stableswap", + "stableswap liquid restaking tokens": "stableswap", + "crypto": "cryptoswap", + "forex": "cryptoswap", + "liquid staking derivatives": "cryptoswap", + "cryptoswap liquid restaking tokens": "cryptoswap", + "tricrypto": "cryptoswap", + "three coin volatile": "cryptoswap", +} + +# params +{ + "fiat redeemable stablecoins": { + "pool_type": "factory", + "params": { + "A": 200, + "fee": int(0.0004 * 10**10), + "fee_mul": 2 * 10**10, + "ma_exp_time": 600, + }.update(params_not_preset[preset_to_invariant["fiat redeemable stablecoins"]]), + }, + "crypto collateralized stablecoins": { + "pool_type": "factory", + "params": { + "A": 100, + "fee": int(0.0004 * 10**10), + "fee_mul": 2 * 10**10, + "ma_exp_time": 600, + }.update(params_not_preset[preset_to_invariant["crypto collateralized stablecoins"]]), + }, + "stableswap liquid restaking tokens": { + "pool_type": "factory", + "params": { + "A": 500, + "fee": int(0.0001 * 10**10), + "fee_mul": 5 * 10**10, + "ma_exp_time": 600, + }.update(params_not_preset[preset_to_invariant["stableswap liquid restaking tokens"]]), + }, + "crypto": { + "pool_type": "factory_crypto", + "params": { + "A": 400000, + "gamma": int(0.000145 * 10**18), + "fee_gamma": int(0.00023 * 10**18), + "mid_fee": int(0.0026 * 10**10), + "out_fee": int(0.0045 * 10**10), + "allowed_extra_profit": int(0.000002 * 10**18), + "adjustment_step": int(0.000146 * 10**18), + "ma_half_time": 600, + }.update(params_not_preset[preset_to_invariant["crypto"]]), + }, + "forex": { + "pool_type": "factory_crypto", + "params": { + "A": 20000000, + "gamma": int(0.001 * 10**18), + "fee_gamma": int(0.005 * 10**18), + "mid_fee": int(0.0005 * 10**10), + "out_fee": int(0.0045 * 10**10), + "allowed_extra_profit": int(0.00000001 * 10**18), + "adjustment_step": int(0.0000055 * 10**18), + "ma_half_time": 600, + }.update(params_not_preset[preset_to_invariant["forex"]]), + }, + "liquid staking derivatives": { + "pool_type": "factory_crypto", + "params": { + "A": 40000000, + "gamma": int(0.002 * 10**18), + "fee_gamma": int(0.3 * 10**18), + "mid_fee": int(0.0003 * 10**10), + "out_fee": int(0.0045 * 10**10), + "allowed_extra_profit": int(0.00000001 * 10**18), + "adjustment_step": int(0.0000055 * 10**18), + "ma_half_time": 600, + }.update(params_not_preset[preset_to_invariant["liquid staking derivatives"]]), + }, + "cryptoswap liquid restaking tokens": { + "pool_type": "factory_crypto", + "params": { + "A": 20000000, + "gamma": int(0.02 * 10**18), + "fee_gamma": int(0.03 * 10**18), + "mid_fee": int(0.00005 * 10**10), + "out_fee": int(0.0008 * 10**10), + "allowed_extra_profit": int(0.00000001 * 10**18), + "adjustment_step": int(0.0000055 * 10**18), + "ma_half_time": 600, + }.update(params_not_preset[preset_to_invariant["cryptoswap liquid restaking tokens"]]), + }, + "tricrypto": { + "pool_type": "factory_tricrypto", + "params": { + "A": 540000, + "gamma": int(0.0000805 * 10**18), + "fee_gamma": int(0.0004 * 10**18), + "mid_fee": int(0.0001 * 10**10), + "out_fee": int(0.014 * 10**10), + "allowed_extra_profit": int(0.0000000001 * 10**18), + "adjustment_step": int(0.0000001 * 10**18), + "ma_half_time": 600, + }.update(params_not_preset[preset_to_invariant["tricrypto"]]), + }, + "three coin volatile": { + "pool_type": "factory_tricrypto", + "params": { + "A": 2700000, + "gamma": int(0.0000013 * 10**18), + "fee_gamma": int(0.00035 * 10**18), + "mid_fee": int(0.0002999999 * 10**10), + "out_fee": int(0.008 * 10**10), + "allowed_extra_profit": int(0.0000001 * 10**18), + "adjustment_step": int(0.0000001 * 10**18), + "ma_half_time": 600, + }.update(params_not_preset[preset_to_invariant["three coin volatile"]]), + }, +} + + +# how to figure which dict type is which, how will simcontext figure which is which? +# simcontext: compare keys with templates above (if dict), if poolmetadatainterface, you already know + +# rethink location of this file: pool/sim_interface instead? \ No newline at end of file diff --git a/curvesim/pool_data/metadata/stableswap.py b/curvesim/pool_data/metadata/stableswap.py index df57ea2c9..f9e6ea3d2 100644 --- a/curvesim/pool_data/metadata/stableswap.py +++ b/curvesim/pool_data/metadata/stableswap.py @@ -16,7 +16,7 @@ def process_to_kwargs(data, normalize): "fee": data["params"]["fee"], "fee_mul": data["params"]["fee_mul"], "admin_fee": data["params"]["admin_fee"], - "virtual_price": data["reserves"]["virtual_price"], + "virtual_price": data["params"]["virtual_price"], } if normalize: diff --git a/curvesim/pool_data/queries/metadata.py b/curvesim/pool_data/queries/metadata.py index eacc249e6..8cb955485 100644 --- a/curvesim/pool_data/queries/metadata.py +++ b/curvesim/pool_data/queries/metadata.py @@ -3,14 +3,15 @@ """ from typing import Optional, Union -from curvesim.constants import Chain, Env -from curvesim.network.subgraph import pool_snapshot_sync, symbol_address_sync +from curvesim.constants import Chain +from curvesim.network.curve_prices import pool_snapshot_sync +# from curvesim.network.subgraph import pool_snapshot_sync, symbol_address_sync # TODO deprecate all? from curvesim.network.web3 import underlying_coin_info_sync -from curvesim.pool_data.metadata import PoolMetaData +from curvesim.pool_data.metadata import PoolMetaData, PoolMetaDataInterface from curvesim.utils import Address, get_event_loop, to_address -def from_address(address, chain, env="prod", end_ts=None): +def from_address(address: str, chain: str, end_ts: Optional[int] = None) -> dict: """ Parameters ---------- @@ -18,50 +19,49 @@ def from_address(address, chain, env="prod", end_ts=None): Address prefixed with '0x' chain: str Chain name - env: str - Environment name for subgraph: 'prod' or 'staging' + end_ts: Optional[int] + TODO Returns ------- Pool snapshot dictionary in the format returned by - :func:`curvesim.network.subgraph.pool_snapshot`. + :func:`curvesim.network.curve_prices.pool_snapshot`. """ loop = get_event_loop() - data = pool_snapshot_sync(address, chain, env=env, end_ts=end_ts, event_loop=loop) + data = pool_snapshot_sync(address, chain=chain, end_ts=end_ts, event_loop=loop) # Get underlying token addresses - if data["pool_type"] == "LENDING": - u_addrs, u_decimals = underlying_coin_info_sync( - data["coins"]["addresses"], event_loop=loop - ) + # if data["pool_type"] == "LENDING": # TODO deprecate? + # u_addrs, u_decimals = underlying_coin_info_sync( + # data["coins"]["addresses"], event_loop=loop + # ) - m = data.pop("coins") - names = [n[1:] for n in m["names"]] + # m = data.pop("coins") + # names = [n[1:] for n in m["names"]] - data["coins"] = { - "names": names, - "addresses": u_addrs, - "decimals": u_decimals, - "wrapper": m, - } + # data["coins"] = { + # "names": names, + # "addresses": u_addrs, + # "decimals": u_decimals, + # "wrapper": m, + # } return data -def from_symbol(symbol, chain, env): - address = symbol_address_sync(symbol, chain) +# def from_symbol(symbol, chain, env): # TODO deprecated? +# address = symbol_address_sync(symbol, chain) - data = from_address(address, chain, env) +# data = from_address(address, chain, env) - return data +# return data def get_metadata( address: Union[str, Address], chain: Union[str, Chain] = Chain.MAINNET, - env: Union[str, Env] = Env.PROD, end_ts: Optional[int] = None, -): +) -> PoolMetaDataInterface: """ Pulls pool state and metadata from daily snapshot. @@ -73,9 +73,9 @@ def get_metadata( chain : str, Chain Chain/layer2 identifier, e.g. “mainnet”, “arbitrum”, “optimism". - end_ts : int, optional - Datetime cutoff, given as Unix timestamp, to pull last snapshot before. - The default value is current datetime, which will pull the most recent snapshot. + end_ts : Optional[int] + Unix timestamp cutoff: the most recent snapshot before it will be pulled. + The default value is the current datetime, which will pull the most recent snapshot. Returns ------- @@ -84,9 +84,8 @@ def get_metadata( """ address = to_address(address) chain = Chain(chain) - env = Env(env) - metadata_dict = from_address(address, chain, env=env, end_ts=end_ts) + metadata_dict = from_address(address, chain, end_ts=end_ts) metadata = PoolMetaData(metadata_dict) return metadata diff --git a/curvesim/price_data/__init__.py b/curvesim/price_data/__init__.py index f3f2a4455..a44c59f76 100644 --- a/curvesim/price_data/__init__.py +++ b/curvesim/price_data/__init__.py @@ -58,9 +58,17 @@ def get_price_data( def _instantiate_data_source(data_source): if isinstance(data_source, str): - data_source_instance = DataSourceEnum[data_source.upper()].value() + if data_source.upper() == "LOCAL": + raise Exception(f"") # TODO + try: + data_source_instance = DataSourceEnum[data_source.upper()].value() + except KeyError: + filepaths = data_source + data_source_instance = DataSourceEnum["LOCAL"].value(filepaths) elif isinstance(data_source, DataSourceEnum): + if data_source == DataSourceEnum.LOCAL: + raise Exception(f"") # TODO data_source_instance = data_source.value() elif isinstance(data_source, DataSource): diff --git a/curvesim/price_data/data_sources/__init__.py b/curvesim/price_data/data_sources/__init__.py index 0d02a239b..591ea0d5f 100644 --- a/curvesim/price_data/data_sources/__init__.py +++ b/curvesim/price_data/data_sources/__init__.py @@ -13,3 +13,4 @@ class DataSourceEnum(Enum): COINGECKO = CoinGeckoPriceVolumeSource LOCAL = CsvDataSource + #CurvePriceVolumeSource? diff --git a/curvesim/price_data/data_sources/coingecko.py b/curvesim/price_data/data_sources/coingecko.py index 660e8fd7c..77f6b67ef 100644 --- a/curvesim/price_data/data_sources/coingecko.py +++ b/curvesim/price_data/data_sources/coingecko.py @@ -60,6 +60,7 @@ def query( _data = _reindex_to_time_sequence(_data, time_sequence, asset.id) data.append(_data) + # data will have length 2 by now # divide prices: (usd/base) / (usd/quote) = quote/base # sum volumes and convert to base: usd / (usd/base) = base base_data, quote_data = data @@ -127,7 +128,7 @@ def _reindex_to_time_sequence(df, time_sequence, asset_id): ) if any(nan_count > 0): - df_reindexed["price"] = df_reindexed["price"].ffill() + df_reindexed["price"] = df_reindexed["price"].ffill().bfill() df_reindexed["volume"] = df_reindexed["volume"].fillna(0) return df_reindexed diff --git a/curvesim/price_data/data_sources/local.py b/curvesim/price_data/data_sources/local.py index ffdb3e4b0..50d14b1e3 100644 --- a/curvesim/price_data/data_sources/local.py +++ b/curvesim/price_data/data_sources/local.py @@ -50,7 +50,22 @@ def _reindex_to_time_sequence(df, time_sequence, asset_id): ) if any(nan_count > 0): - df_reindexed["price"] = df_reindexed["price"].ffill() + df_reindexed["price"] = df_reindexed["price"].ffill().bfill() df_reindexed["volume"] = df_reindexed["volume"].fillna(0) return df_reindexed + + +""" +Some problems: +no documentation on expected format for price-volume data inside file + +no interface to rename csv files (base-quote) based on an AssetPair's symbols (so can work with default read func) + or at least provide a macro/function for converting curvesim's expected naming convention to a user-supplied naming convention for specifying file name in the dir + +unzip zipped csv file directory? + +no dedicated dir - let users use wherever + +save price volume data to csv (by supplying file path) on demand? - no, this is not datasource functionality +""" \ No newline at end of file diff --git a/curvesim/sim/__init__.py b/curvesim/sim/__init__.py index 8d2094e1d..0114e8f0d 100644 --- a/curvesim/sim/__init__.py +++ b/curvesim/sim/__init__.py @@ -19,6 +19,7 @@ logger = get_logger(__name__) +# TODO: deprecate def autosim( pool=None, chain="mainnet", diff --git a/curvesim/templates/data_source.py b/curvesim/templates/data_source.py index 7f97f47f0..bd482e06d 100644 --- a/curvesim/templates/data_source.py +++ b/curvesim/templates/data_source.py @@ -3,9 +3,10 @@ """ from abc import ABC, abstractmethod +from glob import glob from os import extsep -from os.path import join -from typing import Callable, Optional +from os.path import join, expanduser +from typing import Callable, Optional, List from pandas import DataFrame @@ -51,20 +52,47 @@ class FileDataSource(DataSource): DataSource that pulls data from local files. """ - def __init__(self, directory: str = "", read_function: Optional[Callable] = None): + def __init__(self, files: str, read_function: Optional[Callable] = None): """ Fetches asset data for a particular range of times. Parameters ---------- - directory: str, default="" + directory: str, default="" TODO Directory to pull data from. + comma-separated str read_function: Callable, optional Optional custom function to read data file. """ - self.directory = directory + + """ + how to instantiate in get_price_data? + map this type of str to DataSourceEnum.LOCAL = CsvDataSource somewhere + if is str and is not in datasourceenum: + datasourceenum.local(files=str) -> inside FileDataSource, str.split by ",", apply expanduser, apply glob.glob, instantiate self.files as the glob result list (files type = List[str or pathlib.Path?], changed from dir = ""), create mapping from the names in files + + if is DataSourceEnum.LOCAL, filedatasource files arg default will be empty str (list?)? -> instantiate files as the result of searching all .csv in current dir -> the mapping (FileDataSource) will just be: result of fnmatching or globbing for files in files containing "{base}-{quote}" in the name (HOW TO KNOW THE SIMASSETS?), else (if empty) raise error + OR JUST RAISE ERROR in get_price_data + """ + if files == "": + raise Exception("No empty") + + paths: List[str] = files.split(",") + paths = [expanduser(path) for path in paths] + full_paths: List[str] = [] + for path in paths: + matches: List[str] = glob(path) + if matches == []: + raise OSError(f"supplied path doesn't exist {path}") + if not matches[0].endswith(f"{extsep}{self.file_extension}"): + raise Exception("wrong file type") + full_paths.append(matches[0]) + + self.filepaths: List[str] = full_paths + + #self.directory = directory self.read_function = read_function or self._default_read_function def query( @@ -72,7 +100,7 @@ def query( ) -> DataFrame: """ Fetches asset data for a particular range of times. - Uses filepath: {directory}/{base_symbol}-{quote_symbol}.{file_extension} + Uses filepath: {base_symbol}-{quote_symbol}.{file_extension} TODO Parameters ---------- @@ -86,8 +114,11 @@ def query( ------- pandas.DataFrame """ - filename = sim_asset.base.symbol + "-" + sim_asset.quote.symbol - filepath = join(self.directory, filename + extsep + self.file_extension) + search_term = f"{sim_asset.base.symbol}-{sim_asset.quote.symbol}" + try: + filepath: str = list(filter(lambda filename: search_term in filename, self.filepaths)) [0] + except IndexError: + raise Exception(f"no file for {search_term}") df = self.read_function(filepath, sim_asset, time_sequence) return df diff --git a/curvesim/templates/log.py b/curvesim/templates/log.py index 9cea44923..adc2b6b19 100644 --- a/curvesim/templates/log.py +++ b/curvesim/templates/log.py @@ -7,5 +7,5 @@ def update(self, **kwargs): """Updates log data with event data.""" @abstractmethod - def compute_metrics(self): - """Computes metrics from the accumulated log data.""" + def get_logs(self): + """Returns the accumulated log data.""" diff --git a/curvesim/templates/reference_market.py b/curvesim/templates/reference_market.py index 7e3a470c1..470956bb1 100644 --- a/curvesim/templates/reference_market.py +++ b/curvesim/templates/reference_market.py @@ -1,21 +1,19 @@ from abc import ABC, abstractmethod -from typing import List, NewType +from typing import NewType, Dict, Any, List from typing_extensions import Self from curvesim.logging import get_logger +from curvesim.utils import override +from curvesim.iterators.price_samplers import PriceVolume, PriceVolumeSample -logger = get_logger(__name__) - - -TimeStep = NewType("TimeStep", int) +from .price_samplers import PriceSample +from .sim_asset import SimAsset, OnChainAssetPair +logger = get_logger(__name__) -class SimAsset: - id: str - def __init__(self, _id: str) -> Self: - self.id = _id +# TimeStep = NewType("TimeStep", int) class ReferenceMarket(ABC): @@ -24,7 +22,7 @@ class ReferenceMarket(ABC): """ @abstractmethod - def prices(self, sim_assets: List[SimAsset], timestep: TimeStep) -> List[float]: + def quotes(self, asset_pairs: List[OnChainAssetPair], timestamp: Any) -> PriceSample: """ This particular signature supposes: - an infinite-depth external venue, i.e. we can trade at any size @@ -32,3 +30,35 @@ def prices(self, sim_assets: List[SimAsset], timestep: TimeStep) -> List[float]: - the "orderbook" is symmetric, i.e. trade direction doesn't matter. """ raise NotImplementedError + + +class PriceVolumeReference(ReferenceMarket): + """ + + """ + def __init__(price_volume: PriceVolume): + self.price_volume: PriceVolume = price_volume + + @override + def quotes(self, asset_pairs: List[OnChainAssetPair], timestamp: Any) -> PriceVolumeSample: + pass + + # PriceVolume doesn't have any restrictions on the timestamp index's type + # so pass in any timestamp type, and if we can't find it, that's your fault + +""" +- for now, could be a wrapper around pricevolume attr (or at least its .data df attr), basically simcontext.executor needs something to give price data for each timestamp in a timesequence +- prices signature should take in an AssetPair instead + - needs to return a PriceSample (PriceVolumeSample) +- doesn't have data at a supplied timestamp - raise error +- abstract the timestamp? (ie local and coingecko may have different timesequence types) + - assumptions rely on where you sourced the data from (should data sources have an interface for their timesequence type or read function sig. of whatever takes in timesequence? how do we know which data source was instantiated for the market in question?) + - target type should be what? + +- pass referencemarket class in gin config like with log and trader + - new referencemarket every run + - simcontext needs to abstract the instantiation (data-passing) process for arbitrary referencemarket types + - for the base class, what assumptions should we make (on the data and its structure)? what should we leave open? + - data structure is dependent on outside operations, make assumption based on underlying data container + +""" diff --git a/curvesim/templates/strategy.py b/curvesim/templates/strategy.py index 9677975dd..88875aac1 100644 --- a/curvesim/templates/strategy.py +++ b/curvesim/templates/strategy.py @@ -6,9 +6,11 @@ from .log import Log from .trader import Trader -logger = get_logger(__name__) +#from curvesim.pipelines.simulation import compute_metrics +logger = get_logger(__name__) +# TODO: remove this and all child classes class Strategy(ABC): """ A Strategy defines the trading approach used during each step of a simulation. @@ -33,6 +35,7 @@ class Strategy(ABC): trader_class: Optional[Type[Trader]] = None log_class: Optional[Type[Log]] = None + # TODO: no need for metrics param anymore (child classes too) def __init__(self, metrics): """ Parameters @@ -42,7 +45,7 @@ def __init__(self, metrics): """ self.metrics = metrics - def __call__(self, pool, parameters, price_sampler): + def __call__(self, pool, parameters, price_sampler, metrics): """ Computes and executes trades at each timestep. @@ -66,8 +69,8 @@ def __call__(self, pool, parameters, price_sampler): """ # pylint: disable=not-callable - trader = self.trader_class(pool) - log = self.log_class(pool, self.metrics) + trader = self.trader_class() + log = self.log_class(pool) parameters = parameters or "no parameter changes" logger.info("[%s] Simulating with %s", pool.symbol, parameters) @@ -77,10 +80,12 @@ def __call__(self, pool, parameters, price_sampler): for sample in price_sampler: pool.prepare_for_trades(sample.timestamp) trader_args = self._get_trader_inputs(sample) - trade_data = trader.process_time_sample(*trader_args) - log.update(price_sample=sample, trade_data=trade_data) + trade_data = trader.process_time_sample(pool, *trader_args) + log.update(pool=pool, price_sample=sample, trade_data=trade_data) + + run_logs = log.get_logs() - return log.compute_metrics() + return compute_metrics(run_logs, metrics) @abstractmethod def _get_trader_inputs(self, sample): diff --git a/curvesim/templates/time_sequence.py b/curvesim/templates/time_sequence.py index 71ce34bc5..6bb43cf7a 100644 --- a/curvesim/templates/time_sequence.py +++ b/curvesim/templates/time_sequence.py @@ -1,9 +1,9 @@ """Interfaces for TimeSequences, used to track time within simulations.""" - - from datetime import datetime, timezone from typing import Generic, Iterable, Optional, TypeVar, Union +import gin + from pandas import DateOffset, date_range from pandas.tseries.frequencies import to_offset @@ -12,6 +12,7 @@ T = TypeVar("T") +@gin.register class TimeSequence(Generic[T]): """ Generic class for time-like sequences. @@ -38,6 +39,7 @@ def __repr__(self): return f"<{self.__class__.__name__} start={self[0]} end={self[-1]}>" +@gin.register class DateTimeSequence(TimeSequence[datetime]): """ TimeSequence composed of datetimes. diff --git a/curvesim/templates/trader.py b/curvesim/templates/trader.py index f6ee16f40..a081202f6 100644 --- a/curvesim/templates/trader.py +++ b/curvesim/templates/trader.py @@ -2,10 +2,10 @@ from dataclasses import fields from typing import Union -from curvesim.logging import get_logger +from curvesim.logging import get_logger # why is this here? from curvesim.utils import dataclass -logger = get_logger(__name__) +logger = get_logger(__name__) # why is this here? @dataclass(frozen=True, slots=True) @@ -60,22 +60,16 @@ class Trader(ABC): """ Computes, executes, and reports out arbitrage trades. """ - - def __init__(self, pool): + @abstractmethod + def compute_trades(self, pool, *args): """ + Computes trades to execute on the pool. + Parameters ---------- - pool : + pool : :class:`~curvesim.pipelines.templates.SimPool` Simulation interface to a subclass of :class:`.Pool`. - """ - self.pool = pool - - @abstractmethod - def compute_trades(self, *args): - """ - Computes trades to execute on the pool. - Returns ------- trades : list of :class:`Trade` objects @@ -86,12 +80,15 @@ def compute_trades(self, *args): """ raise NotImplementedError - def do_trades(self, trades): + def do_trades(self, pool, trades): """ Executes a series of trades. Parameters ---------- + pool : :class:`~curvesim.pipelines.templates.SimPool` + Simulation interface to a subclass of :class:`.Pool`. + trades : list of :class:`Trade` objects Trades to execute. @@ -104,20 +101,26 @@ def do_trades(self, trades): trade_results = [] for trade in trades: - dy, fee = self.pool.trade(trade.coin_in, trade.coin_out, trade.amount_in) + dy, fee = pool.trade(trade.coin_in, trade.coin_out, trade.amount_in) trade_results.append(TradeResult.from_trade(trade, amount_out=dy, fee=fee)) return trade_results - def process_time_sample(self, *args): + def process_time_sample(self, pool, *args): # this may need to take in individual pools """ Process given tick data by computing and executing trades. + Parameters + ---------- + pool : :class:`~curvesim.pipelines.templates.SimPool` + Simulation interface to a subclass of :class:`.Pool`. + + TODO: alter below The input args must be properly formed and fed by the parent `Strategy` object housing the trader class via its :meth:`~curvesim.pipelines.templates.Strategy._get_trader_inputs`. """ - trades, additional_data = self.compute_trades(*args) - trade_results = self.do_trades(trades) + trades, additional_data = self.compute_trades(pool, *args) + trade_results = self.do_trades(pool, trades) return {"trades": trade_results, **additional_data} diff --git a/curvesim/utils/address.py b/curvesim/utils/address.py index e9a73f9e1..7a2ba8c0c 100644 --- a/curvesim/utils/address.py +++ b/curvesim/utils/address.py @@ -47,4 +47,4 @@ def is_address(address_string: str, checksum=True) -> bool: if checksum: # pylint: disable=no-else-return return is_checksum_address(address_string) else: - return is_normalized_address(address_string) + return is_normalized_address(address_string) or is_checksum_address(address_string) diff --git a/test/integration/test_get_pool_metadata.py b/test/integration/test_get_pool_metadata.py index b8c10de71..c90089481 100644 --- a/test/integration/test_get_pool_metadata.py +++ b/test/integration/test_get_pool_metadata.py @@ -9,6 +9,7 @@ # for p in properties: # stored_properties[p] = getattr(metadata, p) +# TODO: update to reflect switch to curve-prices def _test_pool_metadata(address, chain, stored_properties, stored_dict): metadata = get_metadata(address, chain) diff --git a/test/integration/test_subgraph.py b/test/integration/test_subgraph.py index 401386ba7..d6d9c4346 100644 --- a/test/integration/test_subgraph.py +++ b/test/integration/test_subgraph.py @@ -1,3 +1,4 @@ +# TODO: deprecated import pytest from curvesim.exceptions import SubgraphError diff --git a/test/pool_metadata.py b/test/pool_metadata.py index a25405b5e..1e6785b6a 100644 --- a/test/pool_metadata.py +++ b/test/pool_metadata.py @@ -1,5 +1,6 @@ import json +# TODO: update to reflect switch to curve-prices POOL_TEST_METADATA_JSON = """ { "name": "Curve.fi DAI/USDC/USDT",