diff --git a/crates/datafusion_ext/src/lib.rs b/crates/datafusion_ext/src/lib.rs
index f7a1f6027..abaacd2d2 100644
--- a/crates/datafusion_ext/src/lib.rs
+++ b/crates/datafusion_ext/src/lib.rs
@@ -1,4 +1,5 @@
pub mod errors;
+pub mod metrics;
pub mod planner;
pub mod runtime;
pub mod vars;
diff --git a/crates/datafusion_ext/src/metrics.rs b/crates/datafusion_ext/src/metrics.rs
new file mode 100644
index 000000000..0d98c0cd0
--- /dev/null
+++ b/crates/datafusion_ext/src/metrics.rs
@@ -0,0 +1,247 @@
+use datafusion::{
+ arrow::datatypes::SchemaRef,
+ arrow::{datatypes::Schema, record_batch::RecordBatch},
+ error::Result,
+ execution::TaskContext,
+ physical_expr::PhysicalSortExpr,
+ physical_plan::{
+ metrics::{BaselineMetrics, ExecutionPlanMetricsSet, Gauge, MetricBuilder, MetricsSet},
+ DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, RecordBatchStream,
+ SendableRecordBatchStream, Statistics,
+ },
+};
+use futures::{Stream, StreamExt};
+use std::fmt;
+use std::sync::Arc;
+use std::task::{Context, Poll};
+use std::{any::Any, pin::Pin};
+
+const BYTES_PROCESSED_GAUGE_NAME: &str = "bytes_processed";
+
+/// Standard metrics we should be collecting for all data sources during
+/// queries.
+#[derive(Debug, Clone)]
+pub struct DataSourceMetrics {
+ /// Track bytes processed by source plans.
+ pub bytes_processed: Gauge,
+
+ /// Baseline metrics like output rows and elapsed time.
+ pub baseline: BaselineMetrics,
+}
+
+impl DataSourceMetrics {
+ pub fn new(partition: usize, metrics: &ExecutionPlanMetricsSet) -> Self {
+ let bytes_processed =
+ MetricBuilder::new(metrics).gauge(BYTES_PROCESSED_GAUGE_NAME, partition);
+ let baseline = BaselineMetrics::new(metrics, partition);
+
+ Self {
+ bytes_processed,
+ baseline,
+ }
+ }
+
+ /// Track metrics based on the poll result from an async stream.
+ pub fn record_poll(
+ &self,
+ poll: Poll