From 993cf8ea968d42aee7fb1d2292dd0cdebf299fa3 Mon Sep 17 00:00:00 2001 From: Karol Chojnowski <43881785+kchojn@users.noreply.github.com> Date: Thu, 23 Sep 2021 14:16:20 +0200 Subject: [PATCH 01/10] Guess functions and events from external source(4byte, db) (#41) --- ethtx/decoders/abi/abc.py | 19 +-- ethtx/decoders/abi/calls.py | 43 +++++-- ethtx/decoders/abi/decoder.py | 21 ++-- ethtx/decoders/abi/events.py | 33 ++++- ethtx/decoders/abi/helpers/__init__.py | 0 ethtx/decoders/abi/helpers/utils.py | 88 +++++++++++++ ethtx/decoders/abi/transfers.py | 6 +- ethtx/ethtx.py | 3 +- ethtx/models/semantics_model.py | 24 ++++ ethtx/providers/__init__.py | 15 +++ ethtx/providers/etherscan_provider.py | 1 - .../semantic_providers/semantics_database.py | 39 +++--- .../semantics_repository.py | 65 ++++++++-- ethtx/providers/signature_provider.py | 117 ++++++++++++++++++ tests/model_test.py | 2 +- 15 files changed, 401 insertions(+), 75 deletions(-) create mode 100644 ethtx/decoders/abi/helpers/__init__.py create mode 100644 ethtx/decoders/abi/helpers/utils.py create mode 100644 ethtx/providers/signature_provider.py diff --git a/ethtx/decoders/abi/abc.py b/ethtx/decoders/abi/abc.py index a3fa1a77..26b512ed 100644 --- a/ethtx/decoders/abi/abc.py +++ b/ethtx/decoders/abi/abc.py @@ -14,7 +14,14 @@ from typing import Optional, Any, List, Dict from ethtx.models.decoded_model import DecodedCall, DecodedTransfer -from ethtx.models.objects_model import Block, Transaction, Call, Event, TransactionMetadata, BlockMetadata +from ethtx.models.objects_model import ( + Block, + Transaction, + Call, + Event, + TransactionMetadata, + BlockMetadata, +) from ethtx.providers.semantic_providers.semantics_repository import SemanticsRepository @@ -57,7 +64,7 @@ def decode_calls( block: BlockMetadata, transaction: TransactionMetadata, delegations: Dict[str, set], - token_proxies: Dict[str, dict] + token_proxies: Dict[str, dict], ) -> ABISubmoduleAbc.decode: ... @@ -74,16 +81,12 @@ def decode_events( @abstractmethod def decode_transfers( - self, - call: DecodedCall, - events: [Event], - token_proxies: Dict[str, dict] + self, call: DecodedCall, events: [Event], token_proxies: Dict[str, dict] ) -> ABISubmoduleAbc.decode: ... @abstractmethod def decode_balances( - self, - transfers: List[DecodedTransfer] + self, transfers: List[DecodedTransfer] ) -> ABISubmoduleAbc.decode: ... diff --git a/ethtx/decoders/abi/calls.py b/ethtx/decoders/abi/calls.py index 4434e9f1..90d30a8a 100644 --- a/ethtx/decoders/abi/calls.py +++ b/ethtx/decoders/abi/calls.py @@ -9,20 +9,21 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import logging from typing import Optional, Dict from ethtx.models.decoded_model import DecodedCall from ethtx.models.objects_model import Call, TransactionMetadata, BlockMetadata -from ethtx.utils.measurable import RecursionLimit - +from ethtx.semantics.solidity.precompiles import precompiles from ethtx.semantics.standards.erc20 import ERC20_FUNCTIONS from ethtx.semantics.standards.erc721 import ERC721_FUNCTIONS -from ethtx.semantics.solidity.precompiles import precompiles - +from ethtx.utils.measurable import RecursionLimit from .abc import ABISubmoduleAbc +from .helpers.utils import decode_function_abi_with_external_source from ..decoders.parameters import decode_function_parameters, decode_graffiti_parameters +log = logging.getLogger(__name__) + RECURSION_LIMIT = 2000 @@ -119,14 +120,13 @@ def decode_call( function_input, function_output = [], [] elif self._repository.check_is_contract(chain_id, call.to_address): - standard = self._repository.get_standard(chain_id, call.to_address) function_abi = self._repository.get_function_abi( chain_id, call.to_address, function_signature ) - function_signature = call.call_data[:10] if call.call_data else '' + function_signature = call.call_data[:10] if call.call_data else "" if not function_abi: if standard == "ERC20": @@ -146,9 +146,31 @@ def decode_call( break function_name = function_abi.name if function_abi else function_signature + function_input, function_output = decode_function_parameters( call.call_data, call.return_value, function_abi, call.status ) + + if function_name.startswith("0x") and len(function_signature) > 2: + functions_abi_provider = decode_function_abi_with_external_source( + signature=function_signature, repository=self._repository + ) + for function_abi_provider in functions_abi_provider: + try: + function_abi = function_abi_provider + function_name = function_abi.name + function_input, function_output = decode_function_parameters( + call.call_data, call.return_value, function_abi, call.status + ) + except Exception as e: + log.info( + "Skipping getting function from external source and trying to get next. Error: %s", + e, + ) + continue + else: + break + if ( not call.status and function_output @@ -161,8 +183,11 @@ def decode_call( function_semantics = precompiles[int(call.to_address, 16)] function_name = function_semantics.name function_input, function_output = decode_function_parameters( - call.call_data, call.return_value, function_semantics, call.status, - strip_signature=False + call.call_data, + call.return_value, + function_semantics, + call.status, + strip_signature=False, ) else: function_name = "fallback" diff --git a/ethtx/decoders/abi/decoder.py b/ethtx/decoders/abi/decoder.py index b0af3dd5..b69bdab6 100644 --- a/ethtx/decoders/abi/decoder.py +++ b/ethtx/decoders/abi/decoder.py @@ -51,9 +51,6 @@ def decode_transaction( try: with ExecutionTimer(f"ABI decoding for " + transaction.metadata.tx_hash): - log.info( - "ABI decoding for %s / %s.", transaction.metadata.tx_hash, chain_id - ) full_decoded_transaction = self._decode_transaction( block.metadata, transaction, chain_id, delegations, token_proxies ) @@ -84,7 +81,7 @@ def decode_calls( transaction=transaction, delegations=delegations, token_proxies=token_proxies, - chain_id=chain_id or self._default_chain + chain_id=chain_id or self._default_chain, ) def decode_call( @@ -122,7 +119,7 @@ def decode_events( transaction=transaction, delegations=delegations or {}, token_proxies=token_proxies or {}, - chain_id=chain_id or self._default_chain + chain_id=chain_id or self._default_chain, ) def decode_event( @@ -142,7 +139,7 @@ def decode_event( transaction=transaction, delegations=delegations or {}, token_proxies=token_proxies or {}, - chain_id=chain_id or self._default_chain + chain_id=chain_id or self._default_chain, ) def decode_transfers( @@ -154,11 +151,7 @@ def decode_transfers( ): return ABITransfersDecoder( repository=self._repository, chain_id=chain_id or self._default_chain - ).decode( - call=call, - events=events, - token_proxies=token_proxies or {}, - ) + ).decode(call=call, events=events, token_proxies=token_proxies or {}) def decode_balances(self, transfers: List[DecodedTransfer]): return ABIBalancesDecoder( @@ -192,7 +185,7 @@ def _decode_transaction( transaction.metadata, delegations, token_proxies, - chain_id + chain_id, ) except Exception: log.exception( @@ -209,7 +202,7 @@ def _decode_transaction( transaction.metadata, delegations, token_proxies, - chain_id + chain_id, ) except Exception: log.exception( @@ -224,7 +217,7 @@ def _decode_transaction( full_decoded_transaction.calls, full_decoded_transaction.events, token_proxies, - chain_id + chain_id, ) except Exception: log.exception( diff --git a/ethtx/decoders/abi/events.py b/ethtx/decoders/abi/events.py index 5924d4b1..d76928e5 100644 --- a/ethtx/decoders/abi/events.py +++ b/ethtx/decoders/abi/events.py @@ -17,6 +17,7 @@ from ethtx.semantics.standards.erc20 import ERC20_EVENTS from ethtx.semantics.standards.erc721 import ERC721_EVENTS from .abc import ABISubmoduleAbc +from .helpers.utils import decode_event_abi_name_with_external_source from ..decoders.parameters import decode_event_parameters @@ -75,13 +76,31 @@ def decode_event( if event_signature in ERC20_EVENTS: # try standard ERC20 events - if len([parameter for parameter in ERC20_EVENTS[event_signature].parameters if parameter.indexed]) == \ - len([topic for topic in event.topics if topic]) - 1: + if ( + len( + [ + parameter + for parameter in ERC20_EVENTS[event_signature].parameters + if parameter.indexed + ] + ) + == len([topic for topic in event.topics if topic]) - 1 + ): event_abi = ERC20_EVENTS[event_signature] elif event_signature in ERC721_EVENTS: # try standard ERC721 events - if len([parameter for parameter in ERC721_EVENTS[event_signature].parameters if parameter.indexed]) == \ - len([topic for topic in event.topics if topic]) - 1: + if ( + len( + [ + parameter + for parameter in ERC721_EVENTS[ + event_signature + ].parameters + if parameter.indexed + ] + ) + == len([topic for topic in event.topics if topic]) - 1 + ): event_abi = ERC721_EVENTS[event_signature] if not event_abi: @@ -106,10 +125,16 @@ def decode_event( chain_id, event.contract, token_proxies ) event_name = event_abi.name if event_abi else event_signature + parameters = decode_event_parameters( event.log_data, event.topics, event_abi, anonymous ) + if event_name.startswith("0x") and len(event_name) > 2: + event_name = decode_event_abi_name_with_external_source( + signature=event_signature + ) + return DecodedEvent( chain_id=chain_id, tx_hash=transaction.tx_hash, diff --git a/ethtx/decoders/abi/helpers/__init__.py b/ethtx/decoders/abi/helpers/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ethtx/decoders/abi/helpers/utils.py b/ethtx/decoders/abi/helpers/utils.py new file mode 100644 index 00000000..78e29d4c --- /dev/null +++ b/ethtx/decoders/abi/helpers/utils.py @@ -0,0 +1,88 @@ +# Copyright 2021 DAI Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging +from typing import Iterator, Optional + +from ethtx.models.semantics_model import ( + FunctionSemantics, + ParameterSemantics, + Signature, + SignatureArg, +) +from ethtx.providers import FourByteProvider +from ethtx.providers.semantic_providers.semantics_repository import SemanticsRepository +from ethtx.providers.signature_provider import SignatureProvider + +log = logging.getLogger(__name__) + + +def decode_function_abi_with_external_source( + signature: str, + repository: SemanticsRepository, + _provider: Optional[SignatureProvider] = FourByteProvider, +) -> Iterator[FunctionSemantics]: + + function = repository.get_most_common_signature(signature_hash=signature) + if function: + log.info( + "Successfully guessed function from SemanticsRepository - %s.", + function.json(), + ) + function_semantics = FunctionSemantics( + signature, + function.name, + [ParameterSemantics(arg["name"], arg["type"], []) for arg in function.args], + [], + ) + yield function_semantics + + functions = _provider.get_function(signature=signature) + for func in functions: + if not func: + yield + + function_semantics = FunctionSemantics( + signature, + func.get("name"), + [ + ParameterSemantics(f"arg{i}", arg, []) + for i, arg in enumerate(func.get("args")) + ], + [], + ) + + repository.process_signatures( + signature=Signature( + signature_hash=signature, + name=func.get("name"), + args=[ + SignatureArg(name=param.parameter_name, type=param.parameter_type) + for param in function_semantics.inputs + ], + ) + ) + yield function_semantics + + +def decode_event_abi_name_with_external_source( + signature: str, _provider: Optional[SignatureProvider] = FourByteProvider +) -> str: + events = _provider.get_event(signature=signature) + + for event in events: + + if not event: + return signature + + return event.get("name", signature) + + return signature diff --git a/ethtx/decoders/abi/transfers.py b/ethtx/decoders/abi/transfers.py index ac6b92df..dbedfbdb 100644 --- a/ethtx/decoders/abi/transfers.py +++ b/ethtx/decoders/abi/transfers.py @@ -23,11 +23,7 @@ class ABITransfersDecoder(ABISubmoduleAbc): """Abi Transfers Decoder.""" - def decode( - self, - call: DecodedCall, - events, token_proxies - ) -> List: + def decode(self, call: DecodedCall, events, token_proxies) -> List: """Decode transfers.""" transfers = [] diff --git a/ethtx/ethtx.py b/ethtx/ethtx.py index 1edf3858..0d1627c2 100644 --- a/ethtx/ethtx.py +++ b/ethtx/ethtx.py @@ -19,13 +19,12 @@ from .decoders.decoder_service import DecoderService from .decoders.semantic.decoder import SemanticDecoder from .models.objects_model import Call -from .providers.etherscan_provider import EtherscanProvider +from .providers import EtherscanProvider, Web3Provider from .providers.semantic_providers.semantics_database import ( MongoSemanticsDatabase, ISemanticsDatabase, ) from .providers.semantic_providers.semantics_repository import SemanticsRepository -from .providers.web3_provider import Web3Provider from .utils.validators import assert_tx_hash diff --git a/ethtx/models/semantics_model.py b/ethtx/models/semantics_model.py index e741e655..576809de 100644 --- a/ethtx/models/semantics_model.py +++ b/ethtx/models/semantics_model.py @@ -94,6 +94,30 @@ def __init__( self.outputs = outputs +class SignatureArg(JsonObject): + name: str + type: str + + def __init__(self, name: str, type: str): + self.name = name + self.type = type + + +class Signature(JsonObject): + signature_hash: str + name: str + args: List[SignatureArg] + count: int + + def __init__( + self, signature_hash: str, name: str, args: List[SignatureArg], count: int = 1 + ): + self.signature_hash = signature_hash + self.name = name + self.args = args + self.count = count + + class ERC20Semantics: name: str symbol: str diff --git a/ethtx/providers/__init__.py b/ethtx/providers/__init__.py index e69de29b..ce6ad912 100644 --- a/ethtx/providers/__init__.py +++ b/ethtx/providers/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2021 DAI Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .etherscan_provider import EtherscanProvider +from .signature_provider import FourByteProvider +from .web3_provider import Web3Provider diff --git a/ethtx/providers/etherscan_provider.py b/ethtx/providers/etherscan_provider.py index a00c75ed..d4f7cb23 100644 --- a/ethtx/providers/etherscan_provider.py +++ b/ethtx/providers/etherscan_provider.py @@ -67,7 +67,6 @@ def _get_contract_abi(self, chain_id, contract_name) -> Dict: if resp.status_code == 200: break - if resp.status_code != 200: raise InvalidEtherscanReturnCodeException(resp.status_code, params) diff --git a/ethtx/providers/semantic_providers/semantics_database.py b/ethtx/providers/semantic_providers/semantics_database.py index 275d84c8..5d2af6e2 100644 --- a/ethtx/providers/semantic_providers/semantics_database.py +++ b/ethtx/providers/semantic_providers/semantics_database.py @@ -11,10 +11,13 @@ # limitations under the License. from abc import ABC -from typing import Dict, Optional +from typing import Dict, Optional, Any +from pymongo.cursor import Cursor from pymongo.database import Database as MongoDatabase +from ethtx.models.semantics_model import Signature + class ISemanticsDatabase(ABC): """Semantics Database. Represents raw interface required to be @@ -27,16 +30,16 @@ def get_address_semantics(self, chain_id: str, address: str) -> Optional[Dict]: def get_contract_semantics(self, code_hash: str) -> Optional[Dict]: ... - def get_signature_semantics(self, signature_hash: str) -> Optional[Dict]: + def get_signature_semantics(self, signature_hash: str) -> Any: ... - def insert_contract(self, contract: dict, update_if_exist: bool = False): + def insert_contract(self, contract: dict, update_if_exist: bool = False) -> None: ... - def insert_address(self, address_data: dict, update_if_exist: bool = False): + def insert_address(self, address_data: dict, update_if_exist: bool = False) -> None: ... - def insert_signature(self, signature, update_if_exist: bool = False): + def insert_signature(self, signature, update_if_exist: bool = False) -> None: ... @@ -60,15 +63,23 @@ def get_address_semantics(self, chain_id, address) -> Optional[Dict]: _id = f"{chain_id}-{address}" return self._addresses.find_one({"_id": _id}, {"_id": 0}) - def get_signature_semantics(self, signature_hash): - return self._signatures.find_one({"_id": signature_hash}, {"_id": 0}) + def get_signature_semantics(self, signature_hash: str) -> Cursor: + return self._signatures.find({"signature_hash": signature_hash}) + + def insert_signature(self, signature: dict, update_if_exist=False) -> None: + if update_if_exist: + self._signatures.replace_one( + {"_id": signature["_id"]}, signature, upsert=True + ) + else: + self._signatures.insert_one(signature) def get_contract_semantics(self, code_hash): """Contract hashes are always the same, no mather what chain we use, so there is no need to use chain_id""" return self._contracts.find_one({"_id": code_hash}, {"_id": 0}) - def insert_contract(self, contract, update_if_exist=False): + def insert_contract(self, contract, update_if_exist=False) -> None: contract_with_id = {"_id": contract["code_hash"], **contract} if update_if_exist: @@ -78,7 +89,7 @@ def insert_contract(self, contract, update_if_exist=False): else: self._contracts.insert_one(contract_with_id) - def insert_address(self, address, update_if_exist=False): + def insert_address(self, address, update_if_exist=False) -> None: address_with_id = { "_id": f"{address['chain_id']}-{address['address']}", **address, @@ -90,13 +101,3 @@ def insert_address(self, address, update_if_exist=False): ) else: self._addresses.insert_one(address_with_id) - - def insert_signature(self, signature, update_if_exist=False): - signature_with_id = {"_id": signature["hash"], **signature} - - if update_if_exist: - self._signatures.replace_one( - {"_id": signature_with_id["_id"]}, signature_with_id, upsert=True - ) - else: - self._signatures.insert_one(signature_with_id) diff --git a/ethtx/providers/semantic_providers/semantics_repository.py b/ethtx/providers/semantic_providers/semantics_repository.py index 1e0b2cc4..03030c77 100644 --- a/ethtx/providers/semantic_providers/semantics_repository.py +++ b/ethtx/providers/semantic_providers/semantics_repository.py @@ -11,7 +11,7 @@ # limitations under the License. from functools import lru_cache -from typing import Optional, List +from typing import Optional, List, Any, Dict from ethtx.decoders.decoders.semantics import decode_events_and_functions from ethtx.models.semantics_model import ( @@ -22,14 +22,14 @@ TransformationSemantics, FunctionSemantics, EventSemantics, + Signature, ) -from ethtx.providers.etherscan_provider import EtherscanProvider +from ethtx.providers import EtherscanProvider, Web3Provider from ethtx.providers.semantic_providers.semantics_database import ISemanticsDatabase -from ethtx.providers.web3_provider import Web3Provider from ethtx.semantics.protocols_router import amend_contract_semantics +from ethtx.semantics.solidity.precompiles import precompiles from ethtx.semantics.standards.erc20 import ERC20_FUNCTIONS, ERC20_EVENTS from ethtx.semantics.standards.erc721 import ERC721_FUNCTIONS, ERC721_EVENTS -from ethtx.semantics.solidity.precompiles import precompiles class SemanticsRepository: @@ -55,10 +55,9 @@ def end_record(self) -> List: return tmp_records def _read_stored_semantics(self, address: str, chain_id: str): - def decode_parameter(_parameter): components_semantics = [] - if 'component' in _parameter: + if "component" in _parameter: for component in _parameter["components"]: components_semantics.append(decode_parameter(component)) @@ -215,7 +214,9 @@ def get_semantics(self, chain_id: str, address: str) -> Optional[AddressSemantic else: # try to guess if the address is a toke - potential_erc20_semantics = provider.guess_erc20_token(address, chain_id) + potential_erc20_semantics = provider.guess_erc20_token( + address, chain_id + ) if potential_erc20_semantics: standard = "ERC20" erc20_semantics = ERC20Semantics( @@ -363,10 +364,10 @@ def get_constructor_abi(self, chain_id, address): def get_address_label(self, chain_id, address, token_proxies=None): if not address: - return '' + return "" if int(address, 16) in precompiles: - contract_label = 'Precompiled' + contract_label = "Precompiled" else: semantics = self.get_semantics(chain_id, address) if semantics.erc20: @@ -374,7 +375,9 @@ def get_address_label(self, chain_id, address, token_proxies=None): elif token_proxies and address in token_proxies: contract_label = token_proxies[address][1] + "_proxy" else: - contract_label = semantics.name if semantics and semantics.name else address + contract_label = ( + semantics.name if semantics and semantics.name else address + ) return contract_label @@ -423,7 +426,7 @@ def get_token_data(self, chain_id, address, token_proxies=None): token_symbol = "Unknown" token_decimals = 18 - return token_name, token_symbol, token_decimals, 'ERC20' + return token_name, token_symbol, token_decimals, "ERC20" def update_address(self, chain_id, address, contract): @@ -437,8 +440,46 @@ def update_semantics(self, semantics): if not semantics: return - address_semantics = semantics.json(False) + address_semantics = semantics.json(entire=False) contract_semantics = semantics.contract.json() self.database.insert_contract(contract_semantics, update_if_exist=True) self.database.insert_address(address_semantics, update_if_exist=True) + + def get_most_common_signature(self, signature_hash: str) -> Signature: + signatures = [ + sig + for sig in self.database.get_signature_semantics( + signature_hash=signature_hash + ) + ] + + if signatures: + most_common_signature = max(signatures, key=lambda x: x["count"]) + signature = Signature( + signature_hash=most_common_signature["signature_hash"], + name=most_common_signature["name"], + args=most_common_signature["args"], + count=most_common_signature["count"], + ) + most_common_signature["count"] += 1 + self.database.insert_signature(most_common_signature, update_if_exist=True) + + return signature + + def process_signatures(self, signature: Signature): + signatures = self.database.get_signature_semantics( + signature_hash=signature.signature_hash + ) + for sig in signatures: + if signature.signature_hash == sig["name"] and len(signature.args) == len( + sig["args"] + ): + if any(arg for arg in list(sig["args"][0].values()) if "arg" in arg): + for index, argument in enumerate(sig["args"]): + argument["name"] = signature.args[index].name + argument["type"] = signature.args[index].type + self.database.insert_signature(signature=sig, update_if_exist=True) + break + else: + self.database.insert_signature(signature=signature.json()) diff --git a/ethtx/providers/signature_provider.py b/ethtx/providers/signature_provider.py new file mode 100644 index 00000000..24dda514 --- /dev/null +++ b/ethtx/providers/signature_provider.py @@ -0,0 +1,117 @@ +# Copyright 2021 DAI Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from abc import ABC, abstractmethod +from typing import Dict, List, Any, Iterator, TypedDict + +import requests + + +class SignatureReturnType(TypedDict): + name: str + args: List[str] + + +class SignatureProvider(ABC): + @abstractmethod + def list_function_signatures(self, filters: Dict): + ... + + @abstractmethod + def list_event_signatures(self, filters: Dict): + ... + + @abstractmethod + def get_function(self, signature: str): + ... + + @abstractmethod + def get_event(self, signature: str): + ... + + +class FourByteProvider(SignatureProvider): + API_URL: str = "https://www.4byte.directory/api/v1" + FUNCTION_ENDPOINT: str = "signatures" + EVENT_ENDPOINT: str = "event-signatures" + + def list_function_signatures(self, filters: Dict = None) -> List[Dict]: + return self._get_all(endpoint=self.FUNCTION_ENDPOINT, filters=filters) + + def list_event_signatures(self, filters: Dict = None) -> List[Dict]: + return self._get_all(endpoint=self.EVENT_ENDPOINT, filters=filters) + + def get_function(self, signature: str) -> Iterator[SignatureReturnType]: + if signature == "0x": + raise ValueError(f"Signature can not be: {signature}") + + data = self._get_all( + endpoint=self.FUNCTION_ENDPOINT, filters={"hex_signature": signature} + ) + + for function in reversed(data): + yield self._parse_text_signature_response(function) + + def get_event(self, signature: str) -> Iterator[SignatureReturnType]: + if signature == "0x": + raise ValueError(f"Signature can not be: {signature}") + + data = self._get_all( + endpoint=self.EVENT_ENDPOINT, filters={"hex_signature": signature} + ) + + for event in reversed(data): + yield self._parse_text_signature_response(event) + + def url(self, endpoint: str) -> str: + return f"{self.API_URL}/{endpoint}/" + + def _get_all(self, endpoint: str, filters: Dict = None) -> List[Dict]: + page = 1 + results = [] + + while True: + res = self._get(endpoint, page, filters) + next_url = res.get("next") + results.extend(res.get("results", [])) + + if not next_url: + break + page += 1 + + return results + + def _get( + self, endpoint: str, page: int = 0, filters: Dict = None + ) -> Dict[str, Any]: + if filters is None: + filters = {} + + if page: + filters["page"] = page + + return requests.get(self.url(endpoint), params=filters).json() + + @staticmethod + def _parse_text_signature_response(data: Dict) -> SignatureReturnType: + text_sig = data.get("text_signature", "") + name = text_sig.split("(")[0] if text_sig else "" + + types = ( + text_sig[text_sig.find("(") + 1 : text_sig.rfind(")")] if text_sig else "" + ) + if "(" in types: + types = types[types.find("(") + 1 : types.rfind(")")] + + return {"name": name, "args": types.split(",")} + + +FourByteProvider = FourByteProvider() diff --git a/tests/model_test.py b/tests/model_test.py index ef6d5b12..10ce3fdc 100644 --- a/tests/model_test.py +++ b/tests/model_test.py @@ -1,4 +1,4 @@ -from ethtx.providers.web3_provider import Web3Provider +from ethtx.providers import Web3Provider from .mocks.web3provider import MockWeb3Provider From f46378e0c3553d703a0c55fe568d5dd7e5ec434a Mon Sep 17 00:00:00 2001 From: Karol Chojnowski <43881785+kchojn@users.noreply.github.com> Date: Fri, 24 Sep 2021 12:06:05 +0200 Subject: [PATCH 02/10] Update inserting guessed function to db (#46) --- ethtx/decoders/abi/helpers/utils.py | 49 ++++++++++++++++------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/ethtx/decoders/abi/helpers/utils.py b/ethtx/decoders/abi/helpers/utils.py index 78e29d4c..900845b9 100644 --- a/ethtx/decoders/abi/helpers/utils.py +++ b/ethtx/decoders/abi/helpers/utils.py @@ -44,33 +44,38 @@ def decode_function_abi_with_external_source( [], ) yield function_semantics + return functions = _provider.get_function(signature=signature) - for func in functions: - if not func: - yield + try: + for func in functions: + if not func: + yield - function_semantics = FunctionSemantics( - signature, - func.get("name"), - [ - ParameterSemantics(f"arg{i}", arg, []) - for i, arg in enumerate(func.get("args")) - ], - [], - ) - - repository.process_signatures( - signature=Signature( - signature_hash=signature, - name=func.get("name"), - args=[ - SignatureArg(name=param.parameter_name, type=param.parameter_type) - for param in function_semantics.inputs + function_semantics = FunctionSemantics( + signature, + func.get("name"), + [ + ParameterSemantics(f"arg{i}", arg, []) + for i, arg in enumerate(func.get("args")) ], + [], + ) + yield function_semantics + finally: + if "function_semantics" in locals(): + repository.process_signatures( + signature=Signature( + signature_hash=signature, + name=function_semantics.name, + args=[ + SignatureArg( + name=param.parameter_name, type=param.parameter_type + ) + for param in function_semantics.inputs + ], + ) ) - ) - yield function_semantics def decode_event_abi_name_with_external_source( From 01982f7152cb955ac044d6f875dfb6cddb1a6c76 Mon Sep 17 00:00:00 2001 From: Karol Chojnowski <43881785+kchojn@users.noreply.github.com> Date: Mon, 27 Sep 2021 13:45:44 +0200 Subject: [PATCH 03/10] Add web3 ens resolver, refactor the code (#47) --- ethtx/ethtx.py | 74 +++++++------ ethtx/exceptions.py | 9 +- ethtx/providers/__init__.py | 1 + ethtx/providers/ens_provider.py | 44 ++++++++ ethtx/providers/etherscan_provider.py | 104 +++++++++++------- .../semantics_repository.py | 17 ++- ethtx/providers/web3_provider.py | 14 ++- 7 files changed, 181 insertions(+), 82 deletions(-) create mode 100644 ethtx/providers/ens_provider.py diff --git a/ethtx/ethtx.py b/ethtx/ethtx.py index 0d1627c2..ce16ba74 100644 --- a/ethtx/ethtx.py +++ b/ethtx/ethtx.py @@ -57,15 +57,10 @@ class EthTxDecoders: semantic_decoder: SemanticDecoder abi_decoder: ABIDecoder - def __init__( - self, - semantic_decoder: SemanticDecoder, - abi_decoder: ABIDecoder, - decoder_service: DecoderService, - ): - self.semantic_decoder = semantic_decoder - self.abi_decoder = abi_decoder + def __init__(self, decoder_service: DecoderService): self._decoder_service = decoder_service + self.abi_decoder = decoder_service.abi_decoder + self.semantic_decoder = decoder_service.semantic_decoder def decode_transaction(self, tx_hash: str, chain_id: str = None): assert_tx_hash(tx_hash) @@ -78,48 +73,57 @@ def get_proxies(self, call_tree: Call): class EthTxProviders: web3provider: Web3Provider + etherscan_provider: EtherscanProvider - def __init__(self, web3provider: Web3Provider): + def __init__( + self, web3provider: Web3Provider, etherscan_provider: EtherscanProvider + ): self.web3provider = web3provider + self.etherscan_provider = etherscan_provider class EthTx: - @staticmethod - def initialize(config: EthTxConfig): - default_chain = config.default_chain - mongo_client: MongoClient = connect( - config.mongo_database, host=config.mongo_connection_string - ) - repository = MongoSemanticsDatabase(mongo_client.db) - web3provider = Web3Provider( - config.web3nodes, default_chain=config.default_chain - ) - etherscan = EtherscanProvider( - config.etherscan_api_key, - config.etherscan_urls, - default_chain_id=config.default_chain, - ) - - return EthTx(default_chain, web3provider, repository, etherscan) - - semantics: SemanticsRepository - def __init__( self, default_chain: str, + database: ISemanticsDatabase, web3provider: Web3Provider, - repository: ISemanticsDatabase, - etherscan: EtherscanProvider, + etherscan_provider: EtherscanProvider, ): self._default_chain = default_chain - self._semantics = SemanticsRepository(repository, etherscan, web3provider) + self._semantics_repository = SemanticsRepository( + database_connection=database, + etherscan_provider=etherscan_provider, + web3provider=web3provider, + ) + abi_decoder = ABIDecoder(self.semantics, self._default_chain) semantic_decoder = SemanticDecoder(self.semantics, self._default_chain) decoder_service = DecoderService( abi_decoder, semantic_decoder, web3provider, self._default_chain ) - self._decoders = EthTxDecoders(semantic_decoder, abi_decoder, decoder_service) - self._providers = EthTxProviders(web3provider) + self._decoders = EthTxDecoders(decoder_service=decoder_service) + self._providers = EthTxProviders( + web3provider=web3provider, etherscan_provider=etherscan_provider + ) + + @staticmethod + def initialize(config: EthTxConfig): + mongo_client: MongoClient = connect( + db=config.mongo_database, host=config.mongo_connection_string + ) + repository = MongoSemanticsDatabase(db=mongo_client.db) + + web3provider = Web3Provider( + nodes=config.web3nodes, default_chain=config.default_chain + ) + etherscan_provider = EtherscanProvider( + api_key=config.etherscan_api_key, + nodes=config.etherscan_urls, + default_chain_id=config.default_chain, + ) + + return EthTx(config.default_chain, repository, web3provider, etherscan_provider) @property def decoders(self) -> EthTxDecoders: @@ -129,7 +133,7 @@ def decoders(self) -> EthTxDecoders: @property def semantics(self) -> SemanticsRepository: """EthTx Semantics Repository.""" - return self._semantics + return self._semantics_repository @property def providers(self) -> EthTxProviders: diff --git a/ethtx/exceptions.py b/ethtx/exceptions.py index 82347a10..f58a623f 100644 --- a/ethtx/exceptions.py +++ b/ethtx/exceptions.py @@ -11,7 +11,12 @@ # limitations under the License. -__all__ = ["Web3ConnectionException", "ProcessingException", "InvalidTransactionHash"] +__all__ = [ + "Web3ConnectionException", + "ProcessingException", + "InvalidTransactionHash", + "InvalidEtherscanReturnCodeException", +] import json from typing import Dict @@ -37,8 +42,8 @@ class InvalidTransactionHash(Exception): def __init__(self, tx_hash): super().__init__("Invalid transaction hash provided: " + tx_hash) -class InvalidEtherscanReturnCodeException(Exception): +class InvalidEtherscanReturnCodeException(Exception): def __init__(self, returned_code: int, params: Dict = None): params_msg = " with params: " + json.dumps(params) if params else "" msg = f"Invalid status code for etherscan request: {returned_code} {params_msg}" diff --git a/ethtx/providers/__init__.py b/ethtx/providers/__init__.py index ce6ad912..1e7fb058 100644 --- a/ethtx/providers/__init__.py +++ b/ethtx/providers/__init__.py @@ -10,6 +10,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from .ens_provider import Web3ENSProvider from .etherscan_provider import EtherscanProvider from .signature_provider import FourByteProvider from .web3_provider import Web3Provider diff --git a/ethtx/providers/ens_provider.py b/ethtx/providers/ens_provider.py new file mode 100644 index 00000000..f2af4421 --- /dev/null +++ b/ethtx/providers/ens_provider.py @@ -0,0 +1,44 @@ +# Copyright 2021 DAI Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from abc import ABC, abstractmethod +from typing import Callable, Any + +from ens import ENS +from eth_typing import ChecksumAddress +from web3 import Web3 + + +class ENSProviderABC(ABC): + @abstractmethod + def __init__(self, provider: Callable): + ... + + @abstractmethod + def name(self, address: Any): + ... + + @abstractmethod + def address(self, name: Any): + ... + + +class Web3ENSProvider(ENSProviderABC): + ns: ENS + + def __init__(self, provider: Web3): + self.ns = ENS.fromWeb3(provider) + + def name(self, address: ChecksumAddress) -> str: + return self.ns.name(address=address) + + def address(self, name: str) -> str: + return self.ns.address(name=name) diff --git a/ethtx/providers/etherscan_provider.py b/ethtx/providers/etherscan_provider.py index d4f7cb23..f889850c 100644 --- a/ethtx/providers/etherscan_provider.py +++ b/ethtx/providers/etherscan_provider.py @@ -9,11 +9,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import json import logging +from collections import OrderedDict from functools import lru_cache -from typing import Dict, Optional +from typing import Dict, Optional, Tuple, Union, Any import requests from web3 import Web3 @@ -24,56 +24,41 @@ class EtherscanProvider: + MODULE = "module=" + ACTION = "&action=" + ADDRESS = "&address=" + API_KEY = "&apikey=" + api_key: str endpoints: Dict[str, str] default_chain: Optional[str] + http: requests.sessions.Session + + url_dict = OrderedDict([(MODULE, ""), (ADDRESS, ""), (ACTION, ""), (API_KEY, "")]) def __init__( - self, api_key, nodes: Dict[str, str], default_chain_id: Optional[str] = None + self, + api_key: str, + nodes: Dict[str, str], + default_chain_id: Optional[str] = None, ): self.api_key = api_key self.endpoints = nodes self.default_chain = default_chain_id - def _get_chain_id(self, chain_id): - _id = chain_id or self.default_chain - - if _id is None: - raise ProcessingException( - "chain_id must be provided as argument or constructor default" - ) - return _id + self.http = requests.session() + self.http.headers.update({"User-Agent": "API"}) - @lru_cache(maxsize=1024) - def _get_contract_abi(self, chain_id, contract_name) -> Dict: - # Etherscan connection parameters - params = dict( - module="contract", - action="getsourcecode", - address=contract_name, - apikey=self.api_key, + def build_url(self, chain_id: str, url_dict: OrderedDict) -> str: + return ( + self.endpoints[chain_id] + + "?" + + "".join([param + val if val else "" for param, val in url_dict.items()]) ) - chain_id = self._get_chain_id(chain_id) - headers = {"User-Agent": "API"} - - # TODO: etherscan sometimes returns HTTP 502 with no apparent reason, so it's a quick fix - # that should help, but number of tries should be taken from config in final solution I think - for _ in range(3): - resp = requests.get( - url=self.endpoints[chain_id], params=params, headers=headers - ) - - if resp.status_code == 200: - break - - if resp.status_code != 200: - raise InvalidEtherscanReturnCodeException(resp.status_code, params) - - return resp.json() - - def get_contract_abi(self, chain_id, contract_name): - + def get_contract_abi( + self, chain_id, contract_name + ) -> Tuple[Dict[str, Union[dict, Any]], bool]: decoded = False raw_abi = [] @@ -100,10 +85,49 @@ def get_contract_abi(self, chain_id, contract_name): return dict(name=contract_name, abi=abi), decoded + def _parse_url_dict(self, **params) -> OrderedDict: + self.url_dict[self.API_KEY] = self.api_key + url_dict = self.url_dict.copy() + url_dict[self.ACTION] = params["action"] + url_dict[self.MODULE] = params["module"] + url_dict[self.ADDRESS] = params["address"] + + return url_dict + + def _get_chain_id(self, chain_id) -> str: + _id = chain_id or self.default_chain + + if _id is None: + raise ProcessingException( + "chain_id must be provided as argument or constructor default" + ) + return _id + + @lru_cache(maxsize=1024) + def _get_contract_abi(self, chain_id, contract_name) -> Dict: + chain_id = self._get_chain_id(chain_id) + url_dict = self._parse_url_dict( + action="getsourcecode", module="contract", address=contract_name + ) + + url = self.build_url(chain_id=chain_id, url_dict=url_dict) + + # TODO: etherscan sometimes returns HTTP 502 with no apparent reason, so it's a quick fix + # that should help, but number of tries should be taken from config in final solution I think + for _ in range(3): + resp = self.http.get(url) + + if resp.status_code == 200: + break + + if resp.status_code != 200: + raise InvalidEtherscanReturnCodeException(resp.status_code, url_dict) + + return resp.json() + # helper function decoding contract ABI @staticmethod def _parse_abi(json_abi) -> Dict: - # helper function to recursively parse components def _parse_components(components): diff --git a/ethtx/providers/semantic_providers/semantics_repository.py b/ethtx/providers/semantic_providers/semantics_repository.py index 03030c77..2667b4c5 100644 --- a/ethtx/providers/semantic_providers/semantics_repository.py +++ b/ethtx/providers/semantic_providers/semantics_repository.py @@ -13,6 +13,8 @@ from functools import lru_cache from typing import Optional, List, Any, Dict +from ens import ENS + from ethtx.decoders.decoders.semantics import decode_events_and_functions from ethtx.models.semantics_model import ( AddressSemantics, @@ -148,10 +150,14 @@ def decode_parameter(_parameter): transformations, ) + name = address + if not raw_address_semantics["is_contract"]: + name = self._web3provider.ens.name(address) + address_semantics = AddressSemantics( chain_id, address, - raw_address_semantics["name"], + name, raw_address_semantics["is_contract"], contract_semantics, raw_address_semantics["standard"], @@ -246,8 +252,15 @@ def get_semantics(self, chain_id: str, address: str) -> Optional[AddressSemantic contract_semantics = ContractSemantics( ZERO_HASH, "EOA", dict(), dict(), dict() ) + ns_name = self._web3provider.ens.name(address) address_semantics = AddressSemantics( - chain_id, address, address, False, contract_semantics, None, None + chain_id, + address, + ns_name if ns_name else address, + False, + contract_semantics, + None, + None, ) self.update_semantics(address_semantics) diff --git a/ethtx/providers/web3_provider.py b/ethtx/providers/web3_provider.py index 445b7ad9..f3112925 100644 --- a/ethtx/providers/web3_provider.py +++ b/ethtx/providers/web3_provider.py @@ -15,6 +15,7 @@ from functools import lru_cache from typing import List, Dict, Optional +from ens import ENS from web3 import Web3 from web3.datastructures import AttributeDict from web3.middleware import geth_poa_middleware @@ -77,8 +78,11 @@ def connect_chain( raise -class NodeDataProvider: +def connect_ens(web3: Web3) -> ENS: + return ENS.fromWeb3(web3) + +class NodeDataProvider: default_chain: str def __init__(self, default_chain=None): @@ -105,8 +109,9 @@ def get_calls(self, tx_hash: str, chain_id: Optional[str] = None) -> Call: class Web3Provider(NodeDataProvider): chain: Web3 + ens: ENS - def __init__(self, nodes: Dict[str, str], default_chain=None): + def __init__(self, nodes: Dict[str, dict], default_chain=None): super().__init__(default_chain) self.nodes = nodes @@ -123,9 +128,12 @@ def _get_node_connection(self, chain_id: Optional[str] = None) -> Web3: "unknown chain_id, it must be defined in the EthTxConfig object" ) - return connect_chain( + web3 = connect_chain( http_hook=self.nodes[chain_id]["hook"], poa=self.nodes[chain_id]["poa"] ) + self.ens = connect_ens(web3) + + return web3 # get the raw block data from the node @lru_cache(maxsize=512) From a4f9c12a769e5b7c0eea535044187b91daa00e08 Mon Sep 17 00:00:00 2001 From: Karol Chojnowski <43881785+kchojn@users.noreply.github.com> Date: Tue, 28 Sep 2021 11:58:29 +0200 Subject: [PATCH 04/10] Refactor etherscan (#48) --- ethtx/providers/__init__.py | 2 +- ethtx/providers/etherscan/__init__.py | 13 ++ ethtx/providers/etherscan/client.py | 104 ++++++++++ ethtx/providers/etherscan/contracts.py | 182 ++++++++++++++++++ .../providers/etherscan/etherscan_provider.py | 40 ++++ .../semantics_repository.py | 2 +- 6 files changed, 341 insertions(+), 2 deletions(-) create mode 100644 ethtx/providers/etherscan/__init__.py create mode 100644 ethtx/providers/etherscan/client.py create mode 100644 ethtx/providers/etherscan/contracts.py create mode 100644 ethtx/providers/etherscan/etherscan_provider.py diff --git a/ethtx/providers/__init__.py b/ethtx/providers/__init__.py index 1e7fb058..8f332e18 100644 --- a/ethtx/providers/__init__.py +++ b/ethtx/providers/__init__.py @@ -11,6 +11,6 @@ # limitations under the License. from .ens_provider import Web3ENSProvider -from .etherscan_provider import EtherscanProvider +from .etherscan import EtherscanProvider from .signature_provider import FourByteProvider from .web3_provider import Web3Provider diff --git a/ethtx/providers/etherscan/__init__.py b/ethtx/providers/etherscan/__init__.py new file mode 100644 index 00000000..d1e6456e --- /dev/null +++ b/ethtx/providers/etherscan/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2021 DAI Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .etherscan_provider import EtherscanProvider diff --git a/ethtx/providers/etherscan/client.py b/ethtx/providers/etherscan/client.py new file mode 100644 index 00000000..e81478e8 --- /dev/null +++ b/ethtx/providers/etherscan/client.py @@ -0,0 +1,104 @@ +# Copyright 2021 DAI Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging +from collections import OrderedDict +from typing import Dict, Optional + +import requests + +from ethtx.exceptions import ProcessingException + +log = logging.getLogger(__name__) + + +class EtherscanClient: + MODULE = "module=" + ACTION = "&action=" + CONTRACT_ADDRESS = "&contractaddress=" + ADDRESS = "&address=" + OFFSET = "&offset=" + PAGE = "&page=" + SORT = "&sort=" + BLOCK_TYPE = "&blocktype=" + TO = "&to=" + VALUE = "&value=" + DATA = "&data=" + POSITION = "&position=" + HEX = "&hex=" + GAS_PRICE = "&gasPrice=" + GAS = "&gas=" + START_BLOCK = "&startblock=" + END_BLOCK = "&endblock=" + BLOCKNO = "&blockno=" + TXHASH = "&txhash=" + TAG = "&tag=" + BOOLEAN = "&boolean=" + INDEX = "&index=" + API_KEY = "&apikey=" + + url_dict: OrderedDict = {} + + def __init__( + self, + api_key: str, + nodes: Dict[str, str], + default_chain_id: Optional[str] = None, + ): + self.api_key = api_key + self.endpoints = nodes + self.default_chain = default_chain_id + + self.http = requests.session() + self.http.headers.update({"User-Agent": "API"}) + + self.url_dict = OrderedDict( + [ + (self.MODULE, ""), + (self.ADDRESS, ""), + (self.ACTION, ""), + (self.OFFSET, ""), + (self.PAGE, ""), + (self.SORT, ""), + (self.BLOCK_TYPE, ""), + (self.TO, ""), + (self.VALUE, ""), + (self.DATA, ""), + (self.POSITION, ""), + (self.HEX, ""), + (self.GAS_PRICE, ""), + (self.GAS, ""), + (self.START_BLOCK, ""), + (self.END_BLOCK, ""), + (self.BLOCKNO, ""), + (self.TXHASH, ""), + (self.TAG, ""), + (self.BOOLEAN, ""), + (self.INDEX, ""), + (self.API_KEY, api_key), + ] + ) + + def build_url(self, chain_id: str, url_dict: OrderedDict) -> str: + return ( + self.endpoints[chain_id] + + "?" + + "".join([param + val if val else "" for param, val in url_dict.items()]) + ) + + def _get_chain_id(self, chain_id) -> str: + _id = chain_id or self.default_chain + + if _id is None: + raise ProcessingException( + "chain_id must be provided as argument or constructor default" + ) + return _id diff --git a/ethtx/providers/etherscan/contracts.py b/ethtx/providers/etherscan/contracts.py new file mode 100644 index 00000000..cfdb67b2 --- /dev/null +++ b/ethtx/providers/etherscan/contracts.py @@ -0,0 +1,182 @@ +# Copyright 2021 DAI Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import logging +from functools import lru_cache +from typing import Dict, Tuple, Union, Any, Optional + +from web3 import Web3 + +from ethtx.exceptions import InvalidEtherscanReturnCodeException +from .client import EtherscanClient + +log = logging.getLogger(__name__) + + +class EtherscanContract(EtherscanClient): + def __init__( + self, + api_key: str, + nodes: Dict[str, str], + default_chain_id: Optional[str] = None, + ): + EtherscanClient.__init__( + self, api_key=api_key, nodes=nodes, default_chain_id=default_chain_id + ) + self.contract_dict = self.url_dict.copy() + self.contract_dict[self.MODULE] = "contract" + + def get_contract_abi( + self, chain_id, contract_name + ) -> Tuple[Dict[str, Union[dict, Any]], bool]: + decoded = False + raw_abi = [] + + try: + resp = self._get_contract_abi(chain_id, contract_name) + if resp["status"] == "1" and resp["message"] == "OK": + contract_name = resp["result"][0]["ContractName"] + if ( + len(resp["result"][0]["ABI"]) + and resp["result"][0]["ABI"] != "Contract source code not verified" + ): + raw_abi = json.loads(resp["result"][0]["ABI"]) + decoded = True + + except Exception as e: + log.exception( + "Etherscan connection failed while getting abi for %s on %s", + contract_name, + chain_id, + exc_info=e, + ) + + abi = self._parse_abi(raw_abi) + + return dict(name=contract_name, abi=abi), decoded + + @lru_cache(maxsize=1024) + def _get_contract_abi(self, chain_id, contract_name) -> Dict: + url_dict = self.contract_dict.copy() + url_dict[self.ACTION] = "getsourcecode" + url_dict[self.ADDRESS] = contract_name + url = self.build_url(chain_id=self._get_chain_id(chain_id), url_dict=url_dict) + + # TODO: etherscan sometimes returns HTTP 502 with no apparent reason, so it's a quick fix + # that should help, but number of tries should be taken from config in final solution I think + for _ in range(3): + resp = self.http.get(url) + + if resp.status_code == 200: + break + + if resp.status_code != 200: + raise InvalidEtherscanReturnCodeException(resp.status_code, url_dict) + + return resp.json() + + # helper function decoding contract ABI + @staticmethod + def _parse_abi(json_abi) -> Dict: + # helper function to recursively parse components + def _parse_components(components): + + comp_canonical = "(" + comp_inputs = list() + + for i, component in enumerate(components): + + argument = dict(name=component["name"], type=component["type"]) + + if component["type"][:5] == "tuple": + sub_canonical, sub_components = _parse_components( + component["components"] + ) + comp_canonical += sub_canonical + component["type"][5:] + argument["components"] = sub_components + else: + comp_canonical += component["type"] + sub_components = [] + + if i < len(components) - 1: + comp_canonical += "," + + if ( + component["type"] in ("string", "bytes") + or component["type"][-2:] == "[]" + ): + argument["dynamic"] = True + elif component["type"] == "tuple": + argument["dynamic"] = any(c["dynamic"] for c in sub_components) + else: + argument["dynamic"] = False + + if "indexed" in component: + argument["indexed"] = component["indexed"] + + comp_inputs.append(argument) + + comp_canonical += ")" + + return comp_canonical, comp_inputs + + functions = dict() + events = dict() + + for item in json_abi: + + if "type" in item: + + # parse contract functions + if item["type"] == "constructor": + _, inputs = _parse_components(item["inputs"]) + functions["constructor"] = dict( + signature="constructor", + name="constructor", + inputs=inputs, + outputs=[], + ) + + elif item["type"] == "fallback": + functions["fallback"] = {} + + elif item["type"] == "function": + canonical, inputs = _parse_components(item["inputs"]) + canonical = item["name"] + canonical + function_hash = Web3.sha3(text=canonical).hex() + signature = function_hash[0:10] + + _, outputs = _parse_components(item["outputs"]) + + functions[signature] = dict( + signature=signature, + name=item["name"], + inputs=inputs, + outputs=outputs, + ) + + # parse contract events + elif item["type"] == "event": + canonical, parameters = _parse_components(item["inputs"]) + canonical = item["name"] + canonical + event_hash = Web3.sha3(text=canonical).hex() + signature = event_hash + + events[signature] = dict( + signature=signature, + name=item["name"], + anonymous=item["anonymous"], + parameters=parameters, + ) + + return dict(functions=functions, events=events) diff --git a/ethtx/providers/etherscan/etherscan_provider.py b/ethtx/providers/etherscan/etherscan_provider.py new file mode 100644 index 00000000..30ed0efe --- /dev/null +++ b/ethtx/providers/etherscan/etherscan_provider.py @@ -0,0 +1,40 @@ +# Copyright 2021 DAI Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Dict, Optional + +from .contracts import EtherscanContract + + +class EtherscanProvider: + api_key: str + endpoints: Dict[str, str] + default_chain: Optional[str] + + def __init__( + self, + api_key: str, + nodes: Dict[str, str], + default_chain_id: Optional[str] = None, + ): + self.api_key = api_key + self.endpoints = nodes + self.default_chain = default_chain_id + + self._contract = EtherscanContract( + api_key=self.api_key, + nodes=self.endpoints, + default_chain_id=self.default_chain, + ) + + @property + def contract(self) -> EtherscanContract: + return self._contract diff --git a/ethtx/providers/semantic_providers/semantics_repository.py b/ethtx/providers/semantic_providers/semantics_repository.py index 2667b4c5..7d1ccb49 100644 --- a/ethtx/providers/semantic_providers/semantics_repository.py +++ b/ethtx/providers/semantic_providers/semantics_repository.py @@ -186,7 +186,7 @@ def get_semantics(self, chain_id: str, address: str) -> Optional[AddressSemantic if code_hash != ZERO_HASH: # smart contract - raw_semantics, decoded = self.etherscan.get_contract_abi( + raw_semantics, decoded = self.etherscan.contract.get_contract_abi( chain_id, address ) if decoded and raw_semantics: From ed386b97bdbf4fdb7e325c61116dde5d69a4c3c9 Mon Sep 17 00:00:00 2001 From: Karol Chojnowski <43881785+kchojn@users.noreply.github.com> Date: Tue, 28 Sep 2021 12:03:57 +0200 Subject: [PATCH 05/10] Remove useless file (#49) --- ethtx/providers/etherscan_provider.py | 222 -------------------------- 1 file changed, 222 deletions(-) delete mode 100644 ethtx/providers/etherscan_provider.py diff --git a/ethtx/providers/etherscan_provider.py b/ethtx/providers/etherscan_provider.py deleted file mode 100644 index f889850c..00000000 --- a/ethtx/providers/etherscan_provider.py +++ /dev/null @@ -1,222 +0,0 @@ -# Copyright 2021 DAI Foundation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import json -import logging -from collections import OrderedDict -from functools import lru_cache -from typing import Dict, Optional, Tuple, Union, Any - -import requests -from web3 import Web3 - -from ethtx.exceptions import ProcessingException, InvalidEtherscanReturnCodeException - -log = logging.getLogger(__name__) - - -class EtherscanProvider: - MODULE = "module=" - ACTION = "&action=" - ADDRESS = "&address=" - API_KEY = "&apikey=" - - api_key: str - endpoints: Dict[str, str] - default_chain: Optional[str] - http: requests.sessions.Session - - url_dict = OrderedDict([(MODULE, ""), (ADDRESS, ""), (ACTION, ""), (API_KEY, "")]) - - def __init__( - self, - api_key: str, - nodes: Dict[str, str], - default_chain_id: Optional[str] = None, - ): - self.api_key = api_key - self.endpoints = nodes - self.default_chain = default_chain_id - - self.http = requests.session() - self.http.headers.update({"User-Agent": "API"}) - - def build_url(self, chain_id: str, url_dict: OrderedDict) -> str: - return ( - self.endpoints[chain_id] - + "?" - + "".join([param + val if val else "" for param, val in url_dict.items()]) - ) - - def get_contract_abi( - self, chain_id, contract_name - ) -> Tuple[Dict[str, Union[dict, Any]], bool]: - decoded = False - raw_abi = [] - - try: - resp = self._get_contract_abi(chain_id, contract_name) - if resp["status"] == "1" and resp["message"] == "OK": - contract_name = resp["result"][0]["ContractName"] - if ( - len(resp["result"][0]["ABI"]) - and resp["result"][0]["ABI"] != "Contract source code not verified" - ): - raw_abi = json.loads(resp["result"][0]["ABI"]) - decoded = True - - except Exception as e: - log.exception( - "Etherscan connection failed while getting abi for %s on %s", - contract_name, - chain_id, - exc_info=e, - ) - - abi = self._parse_abi(raw_abi) - - return dict(name=contract_name, abi=abi), decoded - - def _parse_url_dict(self, **params) -> OrderedDict: - self.url_dict[self.API_KEY] = self.api_key - url_dict = self.url_dict.copy() - url_dict[self.ACTION] = params["action"] - url_dict[self.MODULE] = params["module"] - url_dict[self.ADDRESS] = params["address"] - - return url_dict - - def _get_chain_id(self, chain_id) -> str: - _id = chain_id or self.default_chain - - if _id is None: - raise ProcessingException( - "chain_id must be provided as argument or constructor default" - ) - return _id - - @lru_cache(maxsize=1024) - def _get_contract_abi(self, chain_id, contract_name) -> Dict: - chain_id = self._get_chain_id(chain_id) - url_dict = self._parse_url_dict( - action="getsourcecode", module="contract", address=contract_name - ) - - url = self.build_url(chain_id=chain_id, url_dict=url_dict) - - # TODO: etherscan sometimes returns HTTP 502 with no apparent reason, so it's a quick fix - # that should help, but number of tries should be taken from config in final solution I think - for _ in range(3): - resp = self.http.get(url) - - if resp.status_code == 200: - break - - if resp.status_code != 200: - raise InvalidEtherscanReturnCodeException(resp.status_code, url_dict) - - return resp.json() - - # helper function decoding contract ABI - @staticmethod - def _parse_abi(json_abi) -> Dict: - # helper function to recursively parse components - def _parse_components(components): - - comp_canonical = "(" - comp_inputs = list() - - for i, component in enumerate(components): - - argument = dict(name=component["name"], type=component["type"]) - - if component["type"][:5] == "tuple": - sub_canonical, sub_components = _parse_components( - component["components"] - ) - comp_canonical += sub_canonical + component["type"][5:] - argument["components"] = sub_components - else: - comp_canonical += component["type"] - sub_components = [] - - if i < len(components) - 1: - comp_canonical += "," - - if ( - component["type"] in ("string", "bytes") - or component["type"][-2:] == "[]" - ): - argument["dynamic"] = True - elif component["type"] == "tuple": - argument["dynamic"] = any(c["dynamic"] for c in sub_components) - else: - argument["dynamic"] = False - - if "indexed" in component: - argument["indexed"] = component["indexed"] - - comp_inputs.append(argument) - - comp_canonical += ")" - - return comp_canonical, comp_inputs - - functions = dict() - events = dict() - - for item in json_abi: - - if "type" in item: - - # parse contract functions - if item["type"] == "constructor": - _, inputs = _parse_components(item["inputs"]) - functions["constructor"] = dict( - signature="constructor", - name="constructor", - inputs=inputs, - outputs=[], - ) - - elif item["type"] == "fallback": - functions["fallback"] = {} - - elif item["type"] == "function": - canonical, inputs = _parse_components(item["inputs"]) - canonical = item["name"] + canonical - function_hash = Web3.sha3(text=canonical).hex() - signature = function_hash[0:10] - - _, outputs = _parse_components(item["outputs"]) - - functions[signature] = dict( - signature=signature, - name=item["name"], - inputs=inputs, - outputs=outputs, - ) - - # parse contract events - elif item["type"] == "event": - canonical, parameters = _parse_components(item["inputs"]) - canonical = item["name"] + canonical - event_hash = Web3.sha3(text=canonical).hex() - signature = event_hash - - events[signature] = dict( - signature=signature, - name=item["name"], - anonymous=item["anonymous"], - parameters=parameters, - ) - - return dict(functions=functions, events=events) From b02c50d4c97f83df211e1d12a3d3f7d3c4bbd9f7 Mon Sep 17 00:00:00 2001 From: Karol Chojnowski Date: Tue, 28 Sep 2021 13:15:00 +0200 Subject: [PATCH 06/10] black --- ethtx/decoders/abi/abc.py | 6 +-- ethtx/decoders/abi/calls.py | 10 +++-- ethtx/decoders/abi/decoder.py | 10 ++--- ethtx/decoders/abi/events.py | 40 ++++++++++++++----- ethtx/decoders/abi/transfers.py | 14 +++---- ethtx/decoders/decoder_service.py | 51 ++++++++++-------------- ethtx/decoders/decoders/parameters.py | 41 +++++++++++-------- ethtx/decoders/semantic/abc.py | 4 +- ethtx/decoders/semantic/decoder.py | 8 ++-- ethtx/decoders/semantic/events.py | 15 +++---- ethtx/decoders/semantic/helpers/utils.py | 12 ++++-- 11 files changed, 116 insertions(+), 95 deletions(-) diff --git a/ethtx/decoders/abi/abc.py b/ethtx/decoders/abi/abc.py index 3fb06c02..c217b222 100644 --- a/ethtx/decoders/abi/abc.py +++ b/ethtx/decoders/abi/abc.py @@ -22,7 +22,6 @@ TransactionMetadata, BlockMetadata, ) -from ethtx.models.objects_model import Block, Transaction, Call, Event, TransactionMetadata, BlockMetadata from ethtx.providers.semantic_providers.semantics_repository import SemanticsRepository @@ -79,10 +78,7 @@ def decode_events( @abstractmethod def decode_transfers( - self, - call: DecodedCall, - events: [Event], - proxies: Dict[str, Proxy] + self, call: DecodedCall, events: [Event], proxies: Dict[str, Proxy] ) -> ABISubmoduleAbc.decode: ... diff --git a/ethtx/decoders/abi/calls.py b/ethtx/decoders/abi/calls.py index 6969861f..ce750a70 100644 --- a/ethtx/decoders/abi/calls.py +++ b/ethtx/decoders/abi/calls.py @@ -98,9 +98,7 @@ def decode_call( chain_id, call.from_address, proxies ) - to_name = self._repository.get_address_label( - chain_id, call.to_address, proxies - ) + to_name = self._repository.get_address_label(chain_id, call.to_address, proxies) if call.call_type == "selfdestruct": function_name = call.call_type @@ -126,7 +124,11 @@ def decode_call( if not function_abi and call.to_address in proxies: # try to find signature in delegate-called contracts for semantic in proxies[call.to_address].semantics: - function_abi = semantic.contract.functions[function_signature] if function_signature in semantic.contract.functions else None + function_abi = ( + semantic.contract.functions[function_signature] + if function_signature in semantic.contract.functions + else None + ) if function_abi: break diff --git a/ethtx/decoders/abi/decoder.py b/ethtx/decoders/abi/decoder.py index 76746e1d..757eae18 100644 --- a/ethtx/decoders/abi/decoder.py +++ b/ethtx/decoders/abi/decoder.py @@ -18,7 +18,7 @@ DecodedCall, DecodedEvent, DecodedTransfer, - Proxy + Proxy, ) from ethtx.models.objects_model import ( Block, @@ -175,11 +175,7 @@ def _decode_transaction( try: full_decoded_transaction.events = self.decode_events( - transaction.events, - block, - transaction.metadata, - proxies, - chain_id + transaction.events, block, transaction.metadata, proxies, chain_id ) except Exception: log.exception( @@ -210,7 +206,7 @@ def _decode_transaction( full_decoded_transaction.calls, full_decoded_transaction.events, proxies, - chain_id + chain_id, ) except Exception: log.exception( diff --git a/ethtx/decoders/abi/events.py b/ethtx/decoders/abi/events.py index d6c1feff..6212d5ec 100644 --- a/ethtx/decoders/abi/events.py +++ b/ethtx/decoders/abi/events.py @@ -36,18 +36,14 @@ def decode( if isinstance(events, list): return ( [ - self.decode_event( - event, block, transaction, proxies, chain_id - ) + self.decode_event(event, block, transaction, proxies, chain_id) for event in events ] if events else [] ) - return self.decode_event( - events, block, transaction, proxies, chain_id - ) + return self.decode_event(events, block, transaction, proxies, chain_id) def decode_event( self, @@ -84,19 +80,41 @@ def decode_event( if not event_abi and event.contract in proxies: # try to find signature in delegate-called contracts for semantic in proxies[event.contract].semantics: - event_abi = semantic.contract.events[event_signature] if event_signature in semantic.contract.events else None + event_abi = ( + semantic.contract.events[event_signature] + if event_signature in semantic.contract.events + else None + ) if event_abi: break if not event_abi and event_signature in ERC20_EVENTS: # try standard ERC20 events - if len([parameter for parameter in ERC20_EVENTS[event_signature].parameters if parameter.indexed]) == \ - len([topic for topic in event.topics if topic]) - 1: + if ( + len( + [ + parameter + for parameter in ERC20_EVENTS[event_signature].parameters + if parameter.indexed + ] + ) + == len([topic for topic in event.topics if topic]) - 1 + ): event_abi = ERC20_EVENTS[event_signature] elif event_signature in ERC721_EVENTS: # try standard ERC721 events - if len([parameter for parameter in ERC721_EVENTS[event_signature].parameters if parameter.indexed]) == \ - len([topic for topic in event.topics if topic]) - 1: + if ( + len( + [ + parameter + for parameter in ERC721_EVENTS[ + event_signature + ].parameters + if parameter.indexed + ] + ) + == len([topic for topic in event.topics if topic]) - 1 + ): event_abi = ERC721_EVENTS[event_signature] contract_name = self._repository.get_address_label( diff --git a/ethtx/decoders/abi/transfers.py b/ethtx/decoders/abi/transfers.py index 573bd2b0..3d8f5028 100644 --- a/ethtx/decoders/abi/transfers.py +++ b/ethtx/decoders/abi/transfers.py @@ -23,12 +23,7 @@ class ABITransfersDecoder(ABISubmoduleAbc): """Abi Transfers Decoder.""" - def decode( - self, - call: DecodedCall, - events, - proxies - ) -> List: + def decode(self, call: DecodedCall, events, proxies) -> List: """Decode transfers.""" transfers = [] @@ -71,7 +66,12 @@ def _transfers_calls(decoded_call): if standard == "ERC20" or event.contract.address in proxies: - _, token_symbol, token_decimals, _ = self._repository.get_token_data( + ( + _, + token_symbol, + token_decimals, + _, + ) = self._repository.get_token_data( event.chain_id, event.contract.address, proxies ) value = event.parameters[2].value / 10 ** token_decimals diff --git a/ethtx/decoders/decoder_service.py b/ethtx/decoders/decoder_service.py index 652f91ae..fa724269 100644 --- a/ethtx/decoders/decoder_service.py +++ b/ethtx/decoders/decoder_service.py @@ -24,10 +24,7 @@ def __init__(self, abi_decoder, semantic_decoder, web3provider, default_chain): self.web3provider = web3provider self.default_chain = default_chain - def get_delegations( - self, - calls: Union[Call, List[Call]] - ) -> Dict[str, List[str]]: + def get_delegations(self, calls: Union[Call, List[Call]]) -> Dict[str, List[str]]: delegations = dict() @@ -58,9 +55,7 @@ def get_delegations( return delegations def get_proxies( - self, - delegations: Dict[str, List[str]], - chain_id: str + self, delegations: Dict[str, List[str]], chain_id: str ) -> Dict[str, Proxy]: proxies = dict() @@ -72,28 +67,22 @@ def get_proxies( self.default_chain, delegator ) - if is_eip1969_proxy( - chain, - delegator, - delegations[delegator][0] - ): - proxy_type = 'EIP1969Proxy' - fallback_name = 'EIP1969_Proxy' - - elif is_eip1969_beacon_proxy( - chain, - delegator, - delegations[delegator][0] - ): - proxy_type = 'EIP1969Beacon' - fallback_name = 'EIP1969_BeaconProxy' + if is_eip1969_proxy(chain, delegator, delegations[delegator][0]): + proxy_type = "EIP1969Proxy" + fallback_name = "EIP1969_Proxy" + + elif is_eip1969_beacon_proxy(chain, delegator, delegations[delegator][0]): + proxy_type = "EIP1969Beacon" + fallback_name = "EIP1969_BeaconProxy" else: - proxy_type = 'GenericProxy' - fallback_name = 'Proxy' + proxy_type = "GenericProxy" + fallback_name = "Proxy" - delegates_semantics = [self.semantic_decoder.repository.get_semantics(chain_id, delegate) - for delegate in delegations[delegator]] + delegates_semantics = [ + self.semantic_decoder.repository.get_semantics(chain_id, delegate) + for delegate in delegations[delegator] + ] token_semantics = delegator_semantics.erc20 if not token_semantics: @@ -104,10 +93,12 @@ def get_proxies( proxies[delegator] = Proxy( address=delegator, - name=delegator_semantics.name if delegator_semantics and delegator_semantics.name != delegator else fallback_name, + name=delegator_semantics.name + if delegator_semantics and delegator_semantics.name != delegator + else fallback_name, type=proxy_type, semantics=[semantics for semantics in delegates_semantics if semantics], - token=token_semantics + token=token_semantics, ) return proxies @@ -125,7 +116,9 @@ def decode_transaction(self, chain_id: str, tx_hash: str) -> DecodedTransaction: ) # read a raw block from a node block = Block.from_raw( - w3block=self.web3provider.get_block(transaction.metadata.block_number, chain_id), + w3block=self.web3provider.get_block( + transaction.metadata.block_number, chain_id + ), chain_id=chain_id, ) diff --git a/ethtx/decoders/decoders/parameters.py b/ethtx/decoders/decoders/parameters.py index d1d2357a..e613d96f 100644 --- a/ethtx/decoders/decoders/parameters.py +++ b/ethtx/decoders/decoders/parameters.py @@ -121,7 +121,9 @@ def decode_event_parameters(data, topics, abi, anonymous): return event_parameters -def decode_function_parameters(input_data, output, abi, status=True, strip_signature=True): +def decode_function_parameters( + input_data, output, abi, status=True, strip_signature=True +): if strip_signature and len(input_data) >= 10: stripped_input_data = input_data[10:] @@ -129,8 +131,12 @@ def decode_function_parameters(input_data, output, abi, status=True, strip_signa stripped_input_data = input_data[2:] if abi: - if len(abi.inputs) == 1 and abi.inputs[0].parameter_type == 'raw': - input_parameters = [Argument(name=abi.inputs[0].parameter_name, type='bytes', value=input_data)] + if len(abi.inputs) == 1 and abi.inputs[0].parameter_type == "raw": + input_parameters = [ + Argument( + name=abi.inputs[0].parameter_name, type="bytes", value=input_data + ) + ] else: input_parameters, _ = decode_struct(stripped_input_data, abi.inputs) for i, parameter in enumerate(input_parameters): @@ -151,9 +157,15 @@ def decode_function_parameters(input_data, output, abi, status=True, strip_signa if abi.outputs and status and output == "0x": log.warning("Warning: missing output data...") output_parameters = [] - elif output != '0x': - if len(abi.outputs) == 1 and abi.outputs[0].parameter_type == 'raw': - output_parameters = [Argument(name=abi.outputs[0].parameter_name, type='bytes', value=output)] + elif output != "0x": + if len(abi.outputs) == 1 and abi.outputs[0].parameter_type == "raw": + output_parameters = [ + Argument( + name=abi.outputs[0].parameter_name, + type="bytes", + value=output, + ) + ] else: output_parameters, _ = decode_struct(output[2:], abi.outputs) for i, parameter in enumerate(output_parameters): @@ -278,11 +290,11 @@ def decode_dynamic_array(data, array_type): for i in range(count): if array_type in ("bytes", "string"): - offset = int(sub_data[64 * i: 64 * (i + 1)], 16) * 2 + offset = int(sub_data[64 * i : 64 * (i + 1)], 16) * 2 decoded = decode_dynamic_argument(sub_data[offset:], array_type) else: offset = 64 * i - decoded = decode_static_argument(sub_data[offset: offset+64], array_type) + decoded = decode_static_argument(sub_data[offset : offset + 64], array_type) decoded_argument.append(decoded) @@ -296,9 +308,7 @@ def decode_dynamic_argument(argument_bytes, argument_type): value = argument_bytes[64 : 64 + length] if argument_type == "string": - decoded_value = ( - bytes.fromhex(value).decode("utf-8").replace("\x00", "") - ) + decoded_value = bytes.fromhex(value).decode("utf-8").replace("\x00", "") else: decoded_value = "0x" + value else: @@ -309,7 +319,6 @@ def decode_dynamic_argument(argument_bytes, argument_type): # helper function to decode ABI 2.0 structs def decode_struct(data, arguments_abi): - def decode_array(raw_value, argument_type, slot): array_type = argument_type.rsplit("[", 1)[0] @@ -321,15 +330,13 @@ def decode_array(raw_value, argument_type, slot): array_size = int(argument_type[:-1].split("[")[-1]) array_values = [] for _ in range(array_size): - if array_type[-1] == ']': + if array_type[-1] == "]": array_subvalues, slot = decode_array(raw_value, array_type, slot) array_values.append(array_subvalues) else: - array_values.append( - decode_static_argument(raw_value, array_type) - ) + array_values.append(decode_static_argument(raw_value, array_type)) slot += 1 - raw_value = data[slot * 64: (slot + 1) * 64] + raw_value = data[slot * 64 : (slot + 1) * 64] return array_values, slot diff --git a/ethtx/decoders/semantic/abc.py b/ethtx/decoders/semantic/abc.py index 9775300b..e6ad136c 100644 --- a/ethtx/decoders/semantic/abc.py +++ b/ethtx/decoders/semantic/abc.py @@ -19,7 +19,7 @@ DecodedEvent, DecodedTransfer, DecodedBalance, - Proxy + Proxy, ) from ethtx.models.objects_model import BlockMetadata, TransactionMetadata from ethtx.providers.semantic_providers.semantics_repository import SemanticsRepository @@ -54,7 +54,7 @@ def decode_metadata( self, block_metadata: BlockMetadata, tx_metadata: TransactionMetadata, - chain_id: str + chain_id: str, ): ... diff --git a/ethtx/decoders/semantic/decoder.py b/ethtx/decoders/semantic/decoder.py index d11d133d..5ce0df2e 100644 --- a/ethtx/decoders/semantic/decoder.py +++ b/ethtx/decoders/semantic/decoder.py @@ -25,7 +25,7 @@ DecodedTransfer, DecodedBalance, DecodedCall, - Proxy + Proxy, ) from ethtx.models.objects_model import BlockMetadata, TransactionMetadata @@ -38,7 +38,9 @@ def decode_transaction( proxies: Dict[str, Proxy], chain_id: str, ) -> DecodedTransaction: - transaction.metadata = self.decode_metadata(block, transaction.metadata, chain_id) + transaction.metadata = self.decode_metadata( + block, transaction.metadata, chain_id + ) transaction.events = self.decode_events( transaction.events, transaction.metadata, proxies ) @@ -58,7 +60,7 @@ def decode_metadata( self, block_metadata: BlockMetadata, tx_metadata: TransactionMetadata, - chain_id: str + chain_id: str, ) -> DecodedTransactionMetadata: return SemanticMetadataDecoder(repository=self.repository).decode( block_metadata=block_metadata, diff --git a/ethtx/decoders/semantic/events.py b/ethtx/decoders/semantic/events.py index f83c6ce0..870af7cd 100644 --- a/ethtx/decoders/semantic/events.py +++ b/ethtx/decoders/semantic/events.py @@ -41,10 +41,7 @@ def decode( """Semantically decode events.""" if isinstance(events, list): return ( - [ - self.decode_event(event, tx_metadata, proxies) - for event in events - ] + [self.decode_event(event, tx_metadata, proxies) for event in events] if events else [] ) @@ -62,7 +59,7 @@ def decode_event( def _get_parameters_str(parameters): parameters_types = [] for parameter in parameters: - if parameter.type == 'tuple': + if parameter.type == "tuple": parameters_types.append(_get_parameters_str(parameter.value)) else: parameters_types.append(parameter.type) @@ -72,7 +69,7 @@ def _get_parameters_str(parameters): # calculate signature to account for anonymous events parameters_str = _get_parameters_str(event.parameters) calculated_event_signature = Web3.keccak( - text=f'{event.event_name}{parameters_str}' + text=f"{event.event_name}{parameters_str}" ).hex() else: calculated_event_signature = event.event_signature @@ -109,7 +106,11 @@ def _get_parameters_str(parameters): event.contract.address, event.parameters, [], tx_metadata, self.repository ) standard = self.repository.get_standard(event.chain_id, event.contract.address) - if not standard and event.contract.address in proxies and proxies[event.contract.address].token: + if ( + not standard + and event.contract.address in proxies + and proxies[event.contract.address].token + ): standard = "ERC20" # perform parameters transformation diff --git a/ethtx/decoders/semantic/helpers/utils.py b/ethtx/decoders/semantic/helpers/utils.py index d1ff8440..c0ec31b1 100644 --- a/ethtx/decoders/semantic/helpers/utils.py +++ b/ethtx/decoders/semantic/helpers/utils.py @@ -143,9 +143,15 @@ def decode_call(transaction, repository, contract_address, data): transaction.chain_id, contract_address, function_signature ) function_name = function_abi.name if function_abi else function_signature - stripped_function_abi = FunctionSemantics(signature=function_abi.signature, name=function_abi.name, - inputs=function_abi.inputs, outputs=[]) - function_input, _ = decode_function_parameters(data, "0x", stripped_function_abi) + stripped_function_abi = FunctionSemantics( + signature=function_abi.signature, + name=function_abi.name, + inputs=function_abi.inputs, + outputs=[], + ) + function_input, _ = decode_function_parameters( + data, "0x", stripped_function_abi + ) # perform arguments transformations context = create_transformation_context( From dabb565c2d238352097cfee81964d59625c34ab3 Mon Sep 17 00:00:00 2001 From: Karol Chojnowski <43881785+kchojn@users.noreply.github.com> Date: Thu, 30 Sep 2021 14:31:44 +0200 Subject: [PATCH 07/10] Insert new singatures from contracts + `black` code (#52) --- ethtx/decoders/abi/abc.py | 5 +- ethtx/decoders/abi/balances.py | 4 +- ethtx/decoders/abi/calls.py | 9 +- ethtx/decoders/abi/decoder.py | 26 +--- ethtx/decoders/abi/helpers/utils.py | 52 ++++++-- ethtx/decoders/decoder_service.py | 9 +- ethtx/decoders/decoders/parameters.py | 4 +- ethtx/decoders/decoders/semantics.py | 10 +- ethtx/decoders/semantic/calls.py | 2 +- ethtx/decoders/semantic/decoder.py | 4 +- ethtx/decoders/semantic/events.py | 2 +- ethtx/decoders/semantic/helpers/utils.py | 4 +- ethtx/exceptions.py | 8 ++ ethtx/models/semantics_model.py | 9 +- ethtx/models/w3_model.py | 10 +- ethtx/providers/etherscan/contracts.py | 6 +- .../semantic_providers/semantics_database.py | 58 +++++--- .../semantics_repository.py | 96 +++++++++----- ethtx/providers/signature_provider.py | 29 +++- ethtx/providers/web3_provider.py | 10 +- ethtx/semantics/rollups/optimism.py | 2 +- ethtx/semantics/router.py | 6 +- ethtx/semantics/solidity/precompiles.py | 124 ++++++++---------- ethtx/semantics/standards/eip1969.py | 28 +++- ethtx/semantics/standards/erc20.py | 5 +- 25 files changed, 307 insertions(+), 215 deletions(-) diff --git a/ethtx/decoders/abi/abc.py b/ethtx/decoders/abi/abc.py index c217b222..7011f8ac 100644 --- a/ethtx/decoders/abi/abc.py +++ b/ethtx/decoders/abi/abc.py @@ -49,10 +49,7 @@ def __init__( @abstractmethod def decode_transaction( - self, - block: Block, - transaction: Transaction, - proxies: Dict[str, Proxy], + self, block: Block, transaction: Transaction, proxies: Dict[str, Proxy] ): ... diff --git a/ethtx/decoders/abi/balances.py b/ethtx/decoders/abi/balances.py index 47f99ecb..3e4b218f 100644 --- a/ethtx/decoders/abi/balances.py +++ b/ethtx/decoders/abi/balances.py @@ -25,8 +25,8 @@ class ABIBalancesDecoder(ABISubmoduleAbc): def decode(self, transfers: List[DecodedTransfer]) -> List: """Decode balances.""" - balance_holders = dict() - balance_tokens = dict() + balance_holders = {} + balance_tokens = {} for transfer in transfers: if transfer.from_address.address != ZERO_ADDRESS: diff --git a/ethtx/decoders/abi/calls.py b/ethtx/decoders/abi/calls.py index ce750a70..83e9d590 100644 --- a/ethtx/decoders/abi/calls.py +++ b/ethtx/decoders/abi/calls.py @@ -48,14 +48,7 @@ def decode( call_id = "" decoded_root_call = self.decode_call( - call, - block, - transaction, - call_id, - indent, - status, - proxies or {}, - chain_id, + call, block, transaction, call_id, indent, status, proxies or {}, chain_id ) with RecursionLimit(RECURSION_LIMIT): diff --git a/ethtx/decoders/abi/decoder.py b/ethtx/decoders/abi/decoder.py index 757eae18..32b04246 100644 --- a/ethtx/decoders/abi/decoder.py +++ b/ethtx/decoders/abi/decoder.py @@ -50,7 +50,7 @@ def decode_transaction( log.info("ABI decoding for %s / %s.", transaction.metadata.tx_hash, chain_id) try: - with ExecutionTimer(f"ABI decoding for " + transaction.metadata.tx_hash): + with ExecutionTimer("ABI decoding for " + transaction.metadata.tx_hash): full_decoded_transaction = self._decode_transaction( block.metadata, transaction, chain_id, proxies ) @@ -91,12 +91,7 @@ def decode_call( ) -> Optional[DecodedCall]: return ABICallsDecoder( repository=self._repository, chain_id=self._default_chain - ).decode( - call=root_call, - block=block, - transaction=transaction, - proxies=proxies, - ) + ).decode(call=root_call, block=block, transaction=transaction, proxies=proxies) def decode_events( self, @@ -143,11 +138,7 @@ def decode_transfers( ): return ABITransfersDecoder( repository=self._repository, chain_id=chain_id or self._default_chain - ).decode( - call=call, - events=events, - proxies=proxies or {}, - ) + ).decode(call=call, events=events, proxies=proxies or {}) def decode_balances(self, transfers: List[DecodedTransfer]): return ABIBalancesDecoder( @@ -187,11 +178,7 @@ def _decode_transaction( try: full_decoded_transaction.calls = self.decode_calls( - transaction.root_call, - block, - transaction.metadata, - proxies, - chain_id, + transaction.root_call, block, transaction.metadata, proxies, chain_id ) except Exception: log.exception( @@ -229,10 +216,7 @@ def _decode_transaction( return full_decoded_transaction used_semantics = self._repository.end_record() - log.info( - f"Semantics used in decoding {transaction.metadata.tx_hash}: " - + ", ".join(used_semantics) - ) + log.info("Semantics used in decoding %s: ", ", ".join(used_semantics)) full_decoded_transaction.status = True diff --git a/ethtx/decoders/abi/helpers/utils.py b/ethtx/decoders/abi/helpers/utils.py index 900845b9..91c33891 100644 --- a/ethtx/decoders/abi/helpers/utils.py +++ b/ethtx/decoders/abi/helpers/utils.py @@ -10,7 +10,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from typing import Iterator, Optional +from typing import Iterator, Optional, List from ethtx.models.semantics_model import ( FunctionSemantics, @@ -30,8 +30,7 @@ def decode_function_abi_with_external_source( repository: SemanticsRepository, _provider: Optional[SignatureProvider] = FourByteProvider, ) -> Iterator[FunctionSemantics]: - - function = repository.get_most_common_signature(signature_hash=signature) + function = repository.get_most_used_signature(signature_hash=signature) if function: log.info( "Successfully guessed function from SemanticsRepository - %s.", @@ -40,7 +39,7 @@ def decode_function_abi_with_external_source( function_semantics = FunctionSemantics( signature, function.name, - [ParameterSemantics(arg["name"], arg["type"], []) for arg in function.args], + _prepare_parameter_semantics(function.args, function.tuple, unknown=False), [], ) yield function_semantics @@ -55,25 +54,23 @@ def decode_function_abi_with_external_source( function_semantics = FunctionSemantics( signature, func.get("name"), - [ - ParameterSemantics(f"arg{i}", arg, []) - for i, arg in enumerate(func.get("args")) - ], + _prepare_parameter_semantics( + func.get("args"), isinstance(func.get("args"), tuple), unknown=True + ), [], ) yield function_semantics finally: if "function_semantics" in locals(): - repository.process_signatures( + repository.update_or_insert_signature( signature=Signature( signature_hash=signature, name=function_semantics.name, args=[ - SignatureArg( - name=param.parameter_name, type=param.parameter_type - ) - for param in function_semantics.inputs + SignatureArg(name=f"arg{i}", type=arg) + for i, arg in enumerate(func.get("args")) ], + tuple=isinstance(func.get("args"), tuple), ) ) @@ -91,3 +88,32 @@ def decode_event_abi_name_with_external_source( return event.get("name", signature) return signature + + +def _prepare_parameter_semantics( + args, is_tuple: bool, unknown: bool +) -> List[ParameterSemantics]: + if not is_tuple: + return [ + ParameterSemantics( + arg["name"] if not unknown else f"arg{i}", + arg["type"] if not unknown else arg, + [], + ) + for i, arg in enumerate(args) + ] + + return [ + ParameterSemantics( + "params", + "tuple", + [ + ParameterSemantics( + arg["name"] if not unknown else f"arg{i}", + arg["type"] if not unknown else arg, + [], + ) + for i, arg in enumerate(args) + ], + ) + ] diff --git a/ethtx/decoders/decoder_service.py b/ethtx/decoders/decoder_service.py index fa724269..59a756ad 100644 --- a/ethtx/decoders/decoder_service.py +++ b/ethtx/decoders/decoder_service.py @@ -26,7 +26,7 @@ def __init__(self, abi_decoder, semantic_decoder, web3provider, default_chain): def get_delegations(self, calls: Union[Call, List[Call]]) -> Dict[str, List[str]]: - delegations = dict() + delegations = {} if not calls: return delegations @@ -58,7 +58,7 @@ def get_proxies( self, delegations: Dict[str, List[str]], chain_id: str ) -> Dict[str, Proxy]: - proxies = dict() + proxies = {} chain = self.web3provider._get_node_connection(chain_id) for delegator in delegations: @@ -128,10 +128,7 @@ def decode_transaction(self, chain_id: str, tx_hash: str) -> DecodedTransaction: # decode transaction using ABI abi_decoded_tx = self.abi_decoder.decode_transaction( - block=block, - transaction=transaction, - proxies=proxies, - chain_id=chain_id, + block=block, transaction=transaction, proxies=proxies, chain_id=chain_id ) # decode transaction using additional semantics diff --git a/ethtx/decoders/decoders/parameters.py b/ethtx/decoders/decoders/parameters.py index e613d96f..eac2db11 100644 --- a/ethtx/decoders/decoders/parameters.py +++ b/ethtx/decoders/decoders/parameters.py @@ -388,7 +388,7 @@ def decode_array(raw_value, argument_type, slot): argument_value = decode_static_argument(raw_value, argument_type) slot += 1 else: - argument_name = "arg_%d" % (i + 1) + argument_name = f"arg_{i + 1}" argument_type = "unknown" argument_value = "0x" + raw_value @@ -407,7 +407,7 @@ def decode_graffiti_parameters(input_data): try: message = bytearray.fromhex(input_data[2:]).decode() input_parameters = [Argument(name="message", type="string", value=message)] - except Exception as e: + except Exception: # log.warning(e) pass diff --git a/ethtx/decoders/decoders/semantics.py b/ethtx/decoders/decoders/semantics.py index 791e040b..4bcf1197 100644 --- a/ethtx/decoders/decoders/semantics.py +++ b/ethtx/decoders/decoders/semantics.py @@ -58,7 +58,7 @@ def _decode_parameters_list(raw_parameters_list: list) -> List[ParameterSemantic def decode_events_and_functions( abi: dict, ) -> Tuple[Dict[str, EventSemantics], Dict[str, FunctionSemantics]]: - events = dict() + events = {} for signature, raw_event_semantics in abi.get("events", {}).items(): parameters = _decode_parameters_list(raw_event_semantics.get("parameters")) events[signature] = EventSemantics( @@ -68,7 +68,7 @@ def decode_events_and_functions( parameters, ) - functions = dict() + functions = {} for signature, raw_function_semantics in abi.get("functions", {}).items(): if raw_function_semantics: inputs = _decode_parameters_list(raw_function_semantics.get("inputs")) @@ -86,12 +86,12 @@ def decode_events_and_functions( def decode_transformations( raw_transformations: dict, ) -> Dict[str, Dict[str, TransformationSemantics]]: - transformations = dict() + transformations = {} if raw_transformations: for signature, transformation in raw_transformations.items(): - transformations[signature] = dict() + transformations[signature] = {} for parameter_name, parameter_transformation in transformation.get( - "arguments", dict() + "arguments", {} ).items(): transformations[signature][parameter_name] = TransformationSemantics( parameter_transformation.get("name"), diff --git a/ethtx/decoders/semantic/calls.py b/ethtx/decoders/semantic/calls.py index 92b07295..74576ca7 100644 --- a/ethtx/decoders/semantic/calls.py +++ b/ethtx/decoders/semantic/calls.py @@ -47,7 +47,7 @@ def decode( function_transformations.get("name") or call.function_name ) else: - function_transformations = dict() + function_transformations = {} # prepare context for transformations context = create_transformation_context( diff --git a/ethtx/decoders/semantic/decoder.py b/ethtx/decoders/semantic/decoder.py index 5ce0df2e..14b63dc9 100644 --- a/ethtx/decoders/semantic/decoder.py +++ b/ethtx/decoders/semantic/decoder.py @@ -63,9 +63,7 @@ def decode_metadata( chain_id: str, ) -> DecodedTransactionMetadata: return SemanticMetadataDecoder(repository=self.repository).decode( - block_metadata=block_metadata, - tx_metadata=tx_metadata, - chain_id=chain_id, + block_metadata=block_metadata, tx_metadata=tx_metadata, chain_id=chain_id ) def decode_event( diff --git a/ethtx/decoders/semantic/events.py b/ethtx/decoders/semantic/events.py index 870af7cd..61739f7a 100644 --- a/ethtx/decoders/semantic/events.py +++ b/ethtx/decoders/semantic/events.py @@ -99,7 +99,7 @@ def _get_parameters_str(parameters): ): event_transformations = anonymous_events[calculated_event_signature] else: - event_transformations = dict() + event_transformations = {} # prepare context for transformations context = create_transformation_context( diff --git a/ethtx/decoders/semantic/helpers/utils.py b/ethtx/decoders/semantic/helpers/utils.py index c0ec31b1..8bf165bd 100644 --- a/ethtx/decoders/semantic/helpers/utils.py +++ b/ethtx/decoders/semantic/helpers/utils.py @@ -162,7 +162,7 @@ def decode_call(transaction, repository, contract_address, data): repository, argument, f"__input{i}__", - function_transformations or dict(), + function_transformations or {}, None, context, ) @@ -184,7 +184,7 @@ def create_transformation_context( contract, input_variables, output_variables, transaction, repository ): # create a context for transformations - context = dict() + context = {} for i, parameter in enumerate(input_variables): if parameter.name: context[parameter.name] = parameter.value diff --git a/ethtx/exceptions.py b/ethtx/exceptions.py index f58a623f..8a35e888 100644 --- a/ethtx/exceptions.py +++ b/ethtx/exceptions.py @@ -16,6 +16,7 @@ "ProcessingException", "InvalidTransactionHash", "InvalidEtherscanReturnCodeException", + "FourByteConnectionException", ] import json @@ -48,3 +49,10 @@ def __init__(self, returned_code: int, params: Dict = None): params_msg = " with params: " + json.dumps(params) if params else "" msg = f"Invalid status code for etherscan request: {returned_code} {params_msg}" super().__init__(msg) + + +class FourByteConnectionException(Exception): + """ 4byte directory connection error. """ + + def __init__(self, msg: str): + super().__init__(f"Couldn't connect to 4byte.directory: {msg}") diff --git a/ethtx/models/semantics_model.py b/ethtx/models/semantics_model.py index 576809de..f9e359bf 100644 --- a/ethtx/models/semantics_model.py +++ b/ethtx/models/semantics_model.py @@ -108,14 +108,21 @@ class Signature(JsonObject): name: str args: List[SignatureArg] count: int + tuple: bool def __init__( - self, signature_hash: str, name: str, args: List[SignatureArg], count: int = 1 + self, + signature_hash: str, + name: str, + args: List[SignatureArg], + count: int = 1, + tuple: bool = False, ): self.signature_hash = signature_hash self.name = name self.args = args self.count = count + self.tuple = tuple class ERC20Semantics: diff --git a/ethtx/models/w3_model.py b/ethtx/models/w3_model.py index 97a551ce..554f0650 100644 --- a/ethtx/models/w3_model.py +++ b/ethtx/models/w3_model.py @@ -88,7 +88,13 @@ def to_object(self, w3receipt: W3Receipt) -> TransactionMetadata: block_number = self.blockNumber tx_index = self.transactionIndex from_address = self.from_address.lower() - to_address = self.to.lower() if self.to else w3receipt.contractAddress.lower() if w3receipt.contractAddress else None + to_address = ( + self.to.lower() + if self.to + else w3receipt.contractAddress.lower() + if w3receipt.contractAddress + else None + ) tx_value = self.value gas_limit = self.gas gas_price = self.gasPrice @@ -148,7 +154,7 @@ def to_object(self) -> Event: log_data = self.data topics = [] - for i in range(len(self.topics)): + for i, _ in enumerate(self.topics): topics.append(self.topics[i].hex()) return Event( diff --git a/ethtx/providers/etherscan/contracts.py b/ethtx/providers/etherscan/contracts.py index cfdb67b2..3a552644 100644 --- a/ethtx/providers/etherscan/contracts.py +++ b/ethtx/providers/etherscan/contracts.py @@ -92,7 +92,7 @@ def _parse_abi(json_abi) -> Dict: def _parse_components(components): comp_canonical = "(" - comp_inputs = list() + comp_inputs = [] for i, component in enumerate(components): @@ -130,8 +130,8 @@ def _parse_components(components): return comp_canonical, comp_inputs - functions = dict() - events = dict() + functions = {} + events = {} for item in json_abi: diff --git a/ethtx/providers/semantic_providers/semantics_database.py b/ethtx/providers/semantic_providers/semantics_database.py index 5d2af6e2..c3084bc1 100644 --- a/ethtx/providers/semantic_providers/semantics_database.py +++ b/ethtx/providers/semantic_providers/semantics_database.py @@ -11,13 +11,12 @@ # limitations under the License. from abc import ABC -from typing import Dict, Optional, Any +from typing import Dict, Optional, Any, List +import bson from pymongo.cursor import Cursor from pymongo.database import Database as MongoDatabase -from ethtx.models.semantics_model import Signature - class ISemanticsDatabase(ABC): """Semantics Database. Represents raw interface required to be @@ -30,16 +29,16 @@ def get_address_semantics(self, chain_id: str, address: str) -> Optional[Dict]: def get_contract_semantics(self, code_hash: str) -> Optional[Dict]: ... - def get_signature_semantics(self, signature_hash: str) -> Any: + def get_signature_semantics(self, signature_hash: str) -> Optional[List[Dict]]: ... - def insert_contract(self, contract: dict, update_if_exist: bool = False) -> None: + def insert_contract(self, contract: dict, update_if_exist: bool = False) -> Any: ... - def insert_address(self, address_data: dict, update_if_exist: bool = False) -> None: + def insert_address(self, address: dict, update_if_exist: bool = False) -> Any: ... - def insert_signature(self, signature, update_if_exist: bool = False) -> None: + def insert_signature(self, signature, update_if_exist: bool = False) -> Any: ... @@ -66,38 +65,59 @@ def get_address_semantics(self, chain_id, address) -> Optional[Dict]: def get_signature_semantics(self, signature_hash: str) -> Cursor: return self._signatures.find({"signature_hash": signature_hash}) - def insert_signature(self, signature: dict, update_if_exist=False) -> None: + def insert_signature( + self, signature: dict, update_if_exist=False + ) -> Optional[bson.ObjectId]: if update_if_exist: - self._signatures.replace_one( + updated_signature = self._signatures.replace_one( {"_id": signature["_id"]}, signature, upsert=True ) - else: - self._signatures.insert_one(signature) + return ( + None + if updated_signature.modified_count + else updated_signature.upserted_id + ) + + inserted_signature = self._signatures.insert_one(signature) + return inserted_signature.inserted_id def get_contract_semantics(self, code_hash): """Contract hashes are always the same, no mather what chain we use, so there is no need to use chain_id""" return self._contracts.find_one({"_id": code_hash}, {"_id": 0}) - def insert_contract(self, contract, update_if_exist=False) -> None: + def insert_contract( + self, contract, update_if_exist=False + ) -> Optional[bson.ObjectId]: contract_with_id = {"_id": contract["code_hash"], **contract} if update_if_exist: - self._contracts.replace_one( + updated_contract = self._contracts.replace_one( {"_id": contract_with_id["_id"]}, contract_with_id, upsert=True ) - else: - self._contracts.insert_one(contract_with_id) - def insert_address(self, address, update_if_exist=False) -> None: + return ( + None + if updated_contract.modified_count + else updated_contract.upserted_id + ) + + inserted_contract = self._contracts.insert_one(contract_with_id) + return inserted_contract.inserted_id + + def insert_address(self, address, update_if_exist=False) -> Optional[bson.ObjectId]: address_with_id = { "_id": f"{address['chain_id']}-{address['address']}", **address, } if update_if_exist: - self._addresses.replace_one( + updated_address = self._addresses.replace_one( {"_id": address_with_id["_id"]}, address_with_id, upsert=True ) - else: - self._addresses.insert_one(address_with_id) + return ( + None if updated_address.modified_count else updated_address.upserted_id + ) + + inserted_address = self._addresses.insert_one(address_with_id) + return inserted_address.inserted_id diff --git a/ethtx/providers/semantic_providers/semantics_repository.py b/ethtx/providers/semantic_providers/semantics_repository.py index 21d9285a..9c75e32b 100644 --- a/ethtx/providers/semantic_providers/semantics_repository.py +++ b/ethtx/providers/semantic_providers/semantics_repository.py @@ -11,9 +11,7 @@ # limitations under the License. from functools import lru_cache -from typing import Optional, List, Any, Dict - -from ens import ENS +from typing import Optional, List from ethtx.decoders.decoders.semantics import decode_events_and_functions from ethtx.models.semantics_model import ( @@ -25,6 +23,7 @@ FunctionSemantics, EventSemantics, Signature, + SignatureArg, ) from ethtx.providers import EtherscanProvider, Web3Provider from ethtx.providers.semantic_providers.semantics_database import ISemanticsDatabase @@ -93,7 +92,7 @@ def decode_parameter(_parameter): if raw_address_semantics["contract"] == ZERO_HASH: contract_semantics = ContractSemantics( - raw_address_semantics["contract"], "EOA", dict(), dict(), dict() + raw_address_semantics["contract"], "EOA", {}, {}, {} ) else: @@ -101,7 +100,7 @@ def decode_parameter(_parameter): raw_contract_semantics = self.database.get_contract_semantics( raw_address_semantics["contract"] ) - events = dict() + events = {} for signature, event in raw_contract_semantics["events"].items(): @@ -116,7 +115,7 @@ def decode_parameter(_parameter): parameters_semantics, ) - functions = dict() + functions = {} for signature, function in raw_contract_semantics["functions"].items(): inputs_semantics = [] @@ -130,11 +129,11 @@ def decode_parameter(_parameter): signature, function["name"], inputs_semantics, outputs_semantics ) - transformations = dict() + transformations = {} for signature, parameters_transformations in raw_contract_semantics[ "transformations" ].items(): - transformations[signature] = dict() + transformations[signature] = {} for parameter, transformation in parameters_transformations.items(): transformations[signature][parameter] = TransformationSemantics( transformation["transformed_name"], @@ -166,8 +165,7 @@ def decode_parameter(_parameter): return address_semantics - else: - return None + return None @lru_cache(maxsize=128) def get_semantics(self, chain_id: str, address: str) -> Optional[AddressSemantics]: @@ -206,7 +204,7 @@ def get_semantics(self, chain_id: str, address: str) -> Optional[AddressSemantic else: erc20_semantics = None contract_semantics = ContractSemantics( - code_hash, raw_semantics["name"], events, functions, dict() + code_hash, raw_semantics["name"], events, functions, {} ) address_semantics = AddressSemantics( chain_id, @@ -235,7 +233,7 @@ def get_semantics(self, chain_id: str, address: str) -> Optional[AddressSemantic erc20_semantics = None contract_semantics = ContractSemantics( - code_hash, address, dict(), dict(), dict() + code_hash, address, {}, {}, {} ) address_semantics = AddressSemantics( chain_id, @@ -249,9 +247,7 @@ def get_semantics(self, chain_id: str, address: str) -> Optional[AddressSemantic else: # externally owned address - contract_semantics = ContractSemantics( - ZERO_HASH, "EOA", dict(), dict(), dict() - ) + contract_semantics = ContractSemantics(ZERO_HASH, "EOA", {}, {}, {}) ns_name = self._web3provider.ens.name(address) address_semantics = AddressSemantics( chain_id, @@ -446,11 +442,11 @@ def get_token_data(self, chain_id, address, proxies=None): def update_address(self, chain_id, address, contract): updated_address = {"network": chain_id, "address": address, **contract} - self.database.insert_address(address_data=updated_address, update_if_exist=True) + self.database.insert_address(address=updated_address, update_if_exist=True) return updated_address - def update_semantics(self, semantics): + def update_semantics(self, semantics) -> None: if not semantics: return @@ -458,16 +454,50 @@ def update_semantics(self, semantics): address_semantics = semantics.json(entire=False) contract_semantics = semantics.contract.json() - self.database.insert_contract(contract_semantics, update_if_exist=True) - self.database.insert_address(address_semantics, update_if_exist=True) + contract_id = self.database.insert_contract( + contract=contract_semantics, update_if_exist=True + ) + _ = self.database.insert_address( + address=address_semantics, update_if_exist=True + ) - def get_most_common_signature(self, signature_hash: str) -> Signature: - signatures = [ - sig - for sig in self.database.get_signature_semantics( - signature_hash=signature_hash - ) - ] + if contract_id: + self.insert_contract_signatures(semantics.contract) + + def insert_contract_signatures(self, contract_semantics: ContractSemantics): + for _, v in contract_semantics.functions.items(): + if v.signature.startswith("0x"): + if v.inputs: + if v.inputs[0].parameter_type == "tuple": + new_signature = Signature( + signature_hash=v.signature, + name=v.name, + tuple=True, + args=[ + SignatureArg( + name=param.parameter_name, type=param.parameter_type + ) + for param in v.inputs[0].components + ], + ) + else: + new_signature = Signature( + signature_hash=v.signature, + name=v.name, + args=[ + SignatureArg( + name=param.parameter_name, type=param.parameter_type + ) + for param in v.inputs + ], + ) + + self.update_or_insert_signature(new_signature) + + def get_most_used_signature(self, signature_hash: str) -> Optional[Signature]: + signatures = list( + self.database.get_signature_semantics(signature_hash=signature_hash) + ) if signatures: most_common_signature = max(signatures, key=lambda x: x["count"]) @@ -476,13 +506,14 @@ def get_most_common_signature(self, signature_hash: str) -> Signature: name=most_common_signature["name"], args=most_common_signature["args"], count=most_common_signature["count"], + tuple=most_common_signature["tuple"], ) - most_common_signature["count"] += 1 - self.database.insert_signature(most_common_signature, update_if_exist=True) return signature - def process_signatures(self, signature: Signature): + return None + + def update_or_insert_signature(self, signature: Signature): signatures = self.database.get_signature_semantics( signature_hash=signature.signature_hash ) @@ -494,7 +525,10 @@ def process_signatures(self, signature: Signature): for index, argument in enumerate(sig["args"]): argument["name"] = signature.args[index].name argument["type"] = signature.args[index].type - self.database.insert_signature(signature=sig, update_if_exist=True) - break + + sig["count"] += 1 + self.database.insert_signature(signature=sig, update_if_exist=True) + break + else: self.database.insert_signature(signature=signature.json()) diff --git a/ethtx/providers/signature_provider.py b/ethtx/providers/signature_provider.py index 24dda514..8c2c7660 100644 --- a/ethtx/providers/signature_provider.py +++ b/ethtx/providers/signature_provider.py @@ -9,15 +9,20 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import logging from abc import ABC, abstractmethod -from typing import Dict, List, Any, Iterator, TypedDict +from typing import Dict, List, Any, Iterator, TypedDict, Union, Tuple import requests +from ethtx.exceptions import FourByteConnectionException + +log = logging.getLogger(__name__) + class SignatureReturnType(TypedDict): name: str - args: List[str] + args: Union[List[str], Tuple[str]] class SignatureProvider(ABC): @@ -98,20 +103,34 @@ def _get( if page: filters["page"] = page - return requests.get(self.url(endpoint), params=filters).json() + try: + try: + return requests.get( + self.url(endpoint), params=filters, timeout=5 + ).json() + except ( + requests.exceptions.ConnectionError, + requests.exceptions.Timeout, + ) as e: + raise FourByteConnectionException(e) from e + except FourByteConnectionException: + return {} @staticmethod def _parse_text_signature_response(data: Dict) -> SignatureReturnType: text_sig = data.get("text_signature", "") + name = text_sig.split("(")[0] if text_sig else "" types = ( text_sig[text_sig.find("(") + 1 : text_sig.rfind(")")] if text_sig else "" ) if "(" in types: - types = types[types.find("(") + 1 : types.rfind(")")] + args = tuple(types[types.find("(") + 1 : types.rfind(")")].split(",")) + else: + args = types.split(",") - return {"name": name, "args": types.split(",")} + return {"name": name, "args": args} FourByteProvider = FourByteProvider() diff --git a/ethtx/providers/web3_provider.py b/ethtx/providers/web3_provider.py index f3112925..16ba2048 100644 --- a/ethtx/providers/web3_provider.py +++ b/ethtx/providers/web3_provider.py @@ -70,9 +70,9 @@ def connect_chain( w3.eth.block_number, ) return w3 - else: - log.info("%s connection to %s failed.", method, hook) - raise Web3ConnectionException() + + log.info("%s connection to %s failed.", method, hook) + raise Web3ConnectionException() except Exception as exc: log.warning("Node connection %s: %s failed.", method, hook, exc_info=exc) raise @@ -318,11 +318,11 @@ def get_erc20_token( address=Web3.toChecksumAddress(token_address), abi=abi ) name = token.functions.name().call() if name_abi else contract_name - if type(name) == bytes: + if isinstance(name, bytes): name = name.decode("utf-8").replace("\x00", "") symbol = token.functions.symbol().call() if symbol_abi else contract_name - if type(symbol) == bytes: + if isinstance(symbol, bytes): symbol = symbol.decode("utf-8").replace("\x00", "") decimals = token.functions.decimals().call() if decimals_abi else 18 diff --git a/ethtx/semantics/rollups/optimism.py b/ethtx/semantics/rollups/optimism.py index 74b17fcc..d7509c01 100644 --- a/ethtx/semantics/rollups/optimism.py +++ b/ethtx/semantics/rollups/optimism.py @@ -102,7 +102,7 @@ def ECDSA_recover(transaction): return transaction - decoded_methods = dict() + decoded_methods = {} data = "0x00000000" + data diff --git a/ethtx/semantics/router.py b/ethtx/semantics/router.py index bea9dd28..3fd072a5 100644 --- a/ethtx/semantics/router.py +++ b/ethtx/semantics/router.py @@ -49,11 +49,11 @@ def _get_semantics(cls) -> BaseType: for filename in files: filename = filename.replace("/", ".").replace(".py", "") - foo = importlib.import_module( + imported_module = importlib.import_module( f"{cls.root_module_name}{filename.split(cls.root_module_name)[-1]}" ) - for item in dir(foo): - obj = getattr(foo, item) + for item in dir(imported_module): + obj = getattr(imported_module, item) if isinstance(obj, type) and issubclass(obj, Base) and obj != Base: rv[obj.code_hash] = obj.contract_semantics diff --git a/ethtx/semantics/solidity/precompiles.py b/ethtx/semantics/solidity/precompiles.py index 3821a5ba..c542fe0b 100644 --- a/ethtx/semantics/solidity/precompiles.py +++ b/ethtx/semantics/solidity/precompiles.py @@ -16,77 +16,69 @@ precompiles = { 1: FunctionSemantics( - "", - "ecrecover", - [ - ParameterSemantics("hash", "bytes32", []), - ParameterSemantics("v", "bytes8", []), - ParameterSemantics("r", "bytes32", []), - ParameterSemantics("s", "bytes32", []), - ], - [ParameterSemantics("", "address", [])], - ), + "", + "ecrecover", + [ + ParameterSemantics("hash", "bytes32", []), + ParameterSemantics("v", "bytes8", []), + ParameterSemantics("r", "bytes32", []), + ParameterSemantics("s", "bytes32", []), + ], + [ParameterSemantics("", "address", [])], + ), 2: FunctionSemantics( - "", - "sha256", - [ - ParameterSemantics("data", "raw", []) - ], - [ParameterSemantics("", "bytes32", [])], - ), + "", + "sha256", + [ParameterSemantics("data", "raw", [])], + [ParameterSemantics("", "bytes32", [])], + ), 3: FunctionSemantics( - "", - "ripemd160", - [ - ParameterSemantics("data", "raw", []) - ], - [ParameterSemantics("", "bytes32", [])], - ), + "", + "ripemd160", + [ParameterSemantics("data", "raw", [])], + [ParameterSemantics("", "bytes32", [])], + ), 4: FunctionSemantics( - "", - "datacopy", - [ - ParameterSemantics("data", "raw", []) - ], - [ParameterSemantics("", "raw", [])], - ), + "", + "datacopy", + [ParameterSemantics("data", "raw", [])], + [ParameterSemantics("", "raw", [])], + ), 5: FunctionSemantics( - "", - "bigModExp", - [ - ParameterSemantics("base", "bytes32", []), - ParameterSemantics("exp", "bytes32", []), - ParameterSemantics("mod", "bytes32", []) - ], - [ParameterSemantics("", "bytes32", [])], - ), + "", + "bigModExp", + [ + ParameterSemantics("base", "bytes32", []), + ParameterSemantics("exp", "bytes32", []), + ParameterSemantics("mod", "bytes32", []), + ], + [ParameterSemantics("", "bytes32", [])], + ), 6: FunctionSemantics( - "", - "bn256Add", - [ - ParameterSemantics("ax", "bytes32", []), - ParameterSemantics("ay", "bytes32", []), - ParameterSemantics("bx", "bytes32", []), - ParameterSemantics("by", "bytes32", []) - ], - [ParameterSemantics("", "bytes32[2]", [])], - ), + "", + "bn256Add", + [ + ParameterSemantics("ax", "bytes32", []), + ParameterSemantics("ay", "bytes32", []), + ParameterSemantics("bx", "bytes32", []), + ParameterSemantics("by", "bytes32", []), + ], + [ParameterSemantics("", "bytes32[2]", [])], + ), 7: FunctionSemantics( - "", - "bn256ScalarMul", - [ - ParameterSemantics("x", "bytes32", []), - ParameterSemantics("y", "bytes32", []), - ParameterSemantics("scalar", "bytes32", []) - ], - [ParameterSemantics("", "bytes32[2]", [])], - ), + "", + "bn256ScalarMul", + [ + ParameterSemantics("x", "bytes32", []), + ParameterSemantics("y", "bytes32", []), + ParameterSemantics("scalar", "bytes32", []), + ], + [ParameterSemantics("", "bytes32[2]", [])], + ), 8: FunctionSemantics( - "", - "bn256Pairing", - [ - ParameterSemantics("input", "raw", []) - ], - [ParameterSemantics("", "bytes32", [])], - ), + "", + "bn256Pairing", + [ParameterSemantics("input", "raw", [])], + [ParameterSemantics("", "bytes32", [])], + ), } diff --git a/ethtx/semantics/standards/eip1969.py b/ethtx/semantics/standards/eip1969.py index bd24ec4d..f76a0183 100644 --- a/ethtx/semantics/standards/eip1969.py +++ b/ethtx/semantics/standards/eip1969.py @@ -3,9 +3,16 @@ def is_eip1969_proxy(chain, delegator, delegate): - implementation_slot = hex(int(Web3.keccak(text='eip1967.proxy.implementation').hex(), 16) - 1) + implementation_slot = hex( + int(Web3.keccak(text="eip1967.proxy.implementation").hex(), 16) - 1 + ) try: - implementation = '0x' + chain.eth.get_storage_at(Web3.toChecksumAddress(delegator), implementation_slot).hex()[-40:] + implementation = ( + "0x" + + chain.eth.get_storage_at( + Web3.toChecksumAddress(delegator), implementation_slot + ).hex()[-40:] + ) return implementation == delegate except: return False @@ -13,7 +20,7 @@ def is_eip1969_proxy(chain, delegator, delegate): def is_eip1969_beacon_proxy(chain, delegator, delegate): - ibeacon_abi = '''[ + ibeacon_abi = """[ { "inputs": [], "name": "implementation", @@ -27,12 +34,19 @@ def is_eip1969_beacon_proxy(chain, delegator, delegate): "stateMutability": "view", "type": "function" } - ]''' + ]""" - beacon_slot = hex(int(Web3.keccak(text='eip1967.proxy.beacon').hex(), 16) - 1) + beacon_slot = hex(int(Web3.keccak(text="eip1967.proxy.beacon").hex(), 16) - 1) try: - beacon = '0x' + chain.eth.get_storage_at(Web3.toChecksumAddress(delegator), beacon_slot).hex()[-40:] - beacon = chain.eth.contract(address=Web3.toChecksumAddress(beacon), abi=ibeacon_abi) + beacon = ( + "0x" + + chain.eth.get_storage_at( + Web3.toChecksumAddress(delegator), beacon_slot + ).hex()[-40:] + ) + beacon = chain.eth.contract( + address=Web3.toChecksumAddress(beacon), abi=ibeacon_abi + ) implementation = beacon.functions.implementation().call() return implementation == Web3.toChecksumAddress(delegate) except: diff --git a/ethtx/semantics/standards/erc20.py b/ethtx/semantics/standards/erc20.py index 1536b32e..a558427b 100644 --- a/ethtx/semantics/standards/erc20.py +++ b/ethtx/semantics/standards/erc20.py @@ -114,10 +114,7 @@ } erc20_totalSupply_function = FunctionSemantics( - "0x18160ddd", - "totalSupply", - [], - [ParameterSemantics("", "uint256", [])] + "0x18160ddd", "totalSupply", [], [ParameterSemantics("", "uint256", [])] ) erc20_totalSupply_function_transformation = { From dac9e6784b2fe8bf680bd6d71c712b41797125da Mon Sep 17 00:00:00 2001 From: Karol Chojnowski Date: Thu, 30 Sep 2021 14:38:36 +0200 Subject: [PATCH 08/10] rename param --- ethtx/decoders/abi/helpers/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ethtx/decoders/abi/helpers/utils.py b/ethtx/decoders/abi/helpers/utils.py index 91c33891..288fb5ee 100644 --- a/ethtx/decoders/abi/helpers/utils.py +++ b/ethtx/decoders/abi/helpers/utils.py @@ -67,7 +67,7 @@ def decode_function_abi_with_external_source( signature_hash=signature, name=function_semantics.name, args=[ - SignatureArg(name=f"arg{i}", type=arg) + SignatureArg(name=f"arg_{i}", type=arg) for i, arg in enumerate(func.get("args")) ], tuple=isinstance(func.get("args"), tuple), @@ -96,7 +96,7 @@ def _prepare_parameter_semantics( if not is_tuple: return [ ParameterSemantics( - arg["name"] if not unknown else f"arg{i}", + arg["name"] if not unknown else f"arg_{i}", arg["type"] if not unknown else arg, [], ) @@ -109,7 +109,7 @@ def _prepare_parameter_semantics( "tuple", [ ParameterSemantics( - arg["name"] if not unknown else f"arg{i}", + arg["name"] if not unknown else f"arg_{i}", arg["type"] if not unknown else arg, [], ) From 11350c2c55505a265eaaf8be9cdd9d846b731c6d Mon Sep 17 00:00:00 2001 From: Karol Chojnowski Date: Fri, 1 Oct 2021 10:23:31 +0200 Subject: [PATCH 09/10] fix log --- ethtx/decoders/abi/decoder.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ethtx/decoders/abi/decoder.py b/ethtx/decoders/abi/decoder.py index 32b04246..fecf4f3f 100644 --- a/ethtx/decoders/abi/decoder.py +++ b/ethtx/decoders/abi/decoder.py @@ -216,7 +216,11 @@ def _decode_transaction( return full_decoded_transaction used_semantics = self._repository.end_record() - log.info("Semantics used in decoding %s: ", ", ".join(used_semantics)) + log.info( + "Semantics used in decoding %s: %s ", + transaction.metadata.tx_hash, + ", ".join(used_semantics), + ) full_decoded_transaction.status = True From fbd81b876e9c92db797d7c73e628fbf49caf3e78 Mon Sep 17 00:00:00 2001 From: Karol Chojnowski <43881785+kchojn@users.noreply.github.com> Date: Tue, 5 Oct 2021 12:37:01 +0200 Subject: [PATCH 10/10] Fix signature update (#53) --- ethtx/providers/semantic_providers/semantics_repository.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ethtx/providers/semantic_providers/semantics_repository.py b/ethtx/providers/semantic_providers/semantics_repository.py index 9c75e32b..1b8c18d4 100644 --- a/ethtx/providers/semantic_providers/semantics_repository.py +++ b/ethtx/providers/semantic_providers/semantics_repository.py @@ -518,8 +518,10 @@ def update_or_insert_signature(self, signature: Signature): signature_hash=signature.signature_hash ) for sig in signatures: - if signature.signature_hash == sig["name"] and len(signature.args) == len( - sig["args"] + if ( + signature.name == sig["name"] + and signature.signature_hash == sig["signature_hash"] + and len(signature.args) == len(sig["args"]) ): if any(arg for arg in list(sig["args"][0].values()) if "arg" in arg): for index, argument in enumerate(sig["args"]):