Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add substreams block ingestor #4839

Merged
merged 2 commits into from
Sep 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ members = [
"runtime/*",
"server/*",
"store/*",
"substreams-head-tracker",
"graph",
"tests",
]
Expand Down
160 changes: 160 additions & 0 deletions chain/substreams/src/block_ingestor.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
use std::{sync::Arc, time::Duration};

use crate::mapper::Mapper;
use anyhow::{Context, Error};
use graph::blockchain::{
client::ChainClient, substreams_block_stream::SubstreamsBlockStream, BlockIngestor,
};
use graph::prelude::MetricsRegistry;
use graph::slog::trace;
use graph::substreams::Package;
use graph::tokio_stream::StreamExt;
use graph::{
blockchain::block_stream::BlockStreamEvent,
cheap_clone::CheapClone,
components::store::ChainStore,
prelude::{async_trait, error, info, DeploymentHash, Logger},
util::backoff::ExponentialBackoff,
};
use prost::Message;

const SUBSTREAMS_HEAD_TRACKER_BYTES: &[u8; 89935] =
include_bytes!("../../../substreams-head-tracker/substreams-head-tracker-v1.0.0.spkg");

pub struct SubstreamsBlockIngestor {
chain_store: Arc<dyn ChainStore>,
client: Arc<ChainClient<super::Chain>>,
logger: Logger,
chain_name: String,
metrics: Arc<MetricsRegistry>,
}

impl SubstreamsBlockIngestor {
pub fn new(
chain_store: Arc<dyn ChainStore>,
client: Arc<ChainClient<super::Chain>>,
logger: Logger,
chain_name: String,
metrics: Arc<MetricsRegistry>,
) -> SubstreamsBlockIngestor {
SubstreamsBlockIngestor {
chain_store,
client,
logger,
chain_name,
metrics,
}
}

async fn fetch_head_cursor(&self) -> String {
let mut backoff =
ExponentialBackoff::new(Duration::from_millis(250), Duration::from_secs(30));
loop {
match self.chain_store.clone().chain_head_cursor() {
Ok(cursor) => return cursor.unwrap_or_default(),
Err(e) => {
error!(self.logger, "Fetching chain head cursor failed: {:#}", e);

backoff.sleep_async().await;
}
}
}
}

/// Consumes the incoming stream of blocks infinitely until it hits an error. In which case
/// the error is logged right away and the latest available cursor is returned
/// upstream for future consumption.
async fn process_blocks(
&self,
cursor: String,
mut stream: SubstreamsBlockStream<super::Chain>,
) -> String {
let mut latest_cursor = cursor;

while let Some(message) = stream.next().await {
let (block_ptr, cursor) = match message {
Ok(BlockStreamEvent::ProcessBlock(triggers, cursor)) => {
(Arc::new(triggers.block), cursor)
}
Ok(BlockStreamEvent::Revert(_ptr, _cursor)) => {
trace!(self.logger, "Received undo block to ingest, skipping");
continue;
}
Err(e) => {
info!(
self.logger,
"An error occurred while streaming blocks: {}", e
);
break;
}
};

let res = self.process_new_block(block_ptr, cursor.to_string()).await;
if let Err(e) = res {
error!(self.logger, "Process block failed: {:#}", e);
break;
}

latest_cursor = cursor.to_string()
}

error!(
self.logger,
"Stream blocks complete unexpectedly, expecting stream to always stream blocks"
);
latest_cursor
}

async fn process_new_block(
&self,
block_ptr: Arc<super::Block>,
cursor: String,
) -> Result<(), Error> {
trace!(self.logger, "Received new block to ingest {:?}", block_ptr);

self.chain_store
.clone()
.set_chain_head(block_ptr, cursor)
.await
.context("Updating chain head")?;

Ok(())
}
}

#[async_trait]
impl BlockIngestor for SubstreamsBlockIngestor {
async fn run(self: Box<Self>) {
let mapper = Arc::new(Mapper {});
let mut latest_cursor = self.fetch_head_cursor().await;
let mut backoff =
ExponentialBackoff::new(Duration::from_millis(250), Duration::from_secs(30));
let package = Package::decode(SUBSTREAMS_HEAD_TRACKER_BYTES.to_vec().as_ref()).unwrap();

loop {
let stream = SubstreamsBlockStream::<super::Chain>::new(
DeploymentHash::default(),
self.client.cheap_clone(),
None,
Some(latest_cursor.clone()),
mapper.cheap_clone(),
package.modules.clone(),
"map_blocks".to_string(),
vec![],
vec![],
self.logger.cheap_clone(),
self.metrics.cheap_clone(),
);

// Consume the stream of blocks until an error is hit
latest_cursor = self.process_blocks(latest_cursor, stream).await;

// If we reach this point, we must wait a bit before retrying
backoff.sleep_async().await;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like backoff is never reset, which is not ideal. Maybe should reset if cursor changes.

}
}

fn network_name(&self) -> String {
self.chain_name.clone()
}
}
36 changes: 33 additions & 3 deletions chain/substreams/src/chain.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
use crate::block_ingestor::SubstreamsBlockIngestor;
use crate::{data_source::*, EntityChanges, TriggerData, TriggerFilter, TriggersAdapter};
use anyhow::Error;
use graph::blockchain::client::ChainClient;
use graph::blockchain::{BlockIngestor, EmptyNodeCapabilities, NoopRuntimeAdapter};
use graph::blockchain::{
BasicBlockchainBuilder, BlockIngestor, BlockchainBuilder, EmptyNodeCapabilities,
NoopRuntimeAdapter,
};
use graph::components::store::DeploymentCursorTracker;
use graph::firehose::FirehoseEndpoints;
use graph::prelude::{BlockHash, LoggerFactory, MetricsRegistry};
use graph::prelude::{BlockHash, CheapClone, LoggerFactory, MetricsRegistry};
use graph::{
blockchain::{
self,
Expand Down Expand Up @@ -163,6 +167,32 @@ impl Blockchain for Chain {
}

fn block_ingestor(&self) -> anyhow::Result<Box<dyn BlockIngestor>> {
unreachable!("Substreams rely on the block ingestor from the network they are processing")
Ok(Box::new(SubstreamsBlockIngestor::new(
self.chain_store.cheap_clone(),
self.client.cheap_clone(),
self.logger_factory.component_logger("", None),
"substreams".to_string(),
self.metrics_registry.cheap_clone(),
)))
}
}

impl BlockchainBuilder<super::Chain> for BasicBlockchainBuilder {
fn build(self) -> super::Chain {
let BasicBlockchainBuilder {
logger_factory,
name: _,
chain_store,
firehose_endpoints,
metrics_registry,
} = self;

Chain {
chain_store,
block_stream_builder: Arc::new(crate::BlockStreamBuilder::new()),
logger_factory,
client: Arc::new(ChainClient::new_firehose(firehose_endpoints)),
metrics_registry,
}
}
}
1 change: 1 addition & 0 deletions chain/substreams/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ mod codec;
mod data_source;
mod trigger;

pub mod block_ingestor;
pub mod mapper;

pub use block_stream::BlockStreamBuilder;
Expand Down
66 changes: 66 additions & 0 deletions node/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,31 @@ impl Chain {
for provider in self.providers.iter_mut() {
provider.validate()?
}

if !matches!(self.protocol, BlockchainKind::Substreams) {
let has_only_substreams_providers = self
.providers
.iter()
.all(|provider| matches!(provider.details, ProviderDetails::Substreams(_)));
if has_only_substreams_providers {
bail!(
"{} protocol requires an rpc or firehose endpoint defined",
self.protocol
);
}
}

// When using substreams protocol, only substreams endpoints are allowed
if matches!(self.protocol, BlockchainKind::Substreams) {
let has_non_substreams_providers = self
.providers
.iter()
.any(|provider| !matches!(provider.details, ProviderDetails::Substreams(_)));
if has_non_substreams_providers {
bail!("Substreams protocol only supports substreams providers");
}
}

Ok(())
}
}
Expand Down Expand Up @@ -1313,6 +1338,47 @@ mod tests {
);
}

#[test]
fn fails_if_non_substreams_provider_for_substreams_protocol() {
let mut actual = toml::from_str::<ChainSection>(
r#"
ingestor = "block_ingestor_node"
[mainnet]
shard = "primary"
protocol = "substreams"
provider = [
{ label = "firehose", details = { type = "firehose", url = "http://127.0.0.1:8888", token = "TOKEN", features = ["filters"] }},
]
"#,
)
.unwrap();
let err = actual.validate().unwrap_err().to_string();

assert!(err.contains("only supports substreams providers"), "{err}");
}

#[test]
fn fails_if_only_substreams_provider_for_non_substreams_protocol() {
let mut actual = toml::from_str::<ChainSection>(
r#"
ingestor = "block_ingestor_node"
[mainnet]
shard = "primary"
protocol = "ethereum"
provider = [
{ label = "firehose", details = { type = "substreams", url = "http://127.0.0.1:8888", token = "TOKEN", features = ["filters"] }},
]
"#,
)
.unwrap();
let err = actual.validate().unwrap_err().to_string();

assert!(
err.contains("ethereum protocol requires an rpc or firehose endpoint defined"),
"{err}"
);
}

#[test]
fn it_works_on_new_web3_provider_from_toml() {
let actual = toml::from_str(
Expand Down
Loading