diff --git a/ballista/rust/core/proto/ballista.proto b/ballista/rust/core/proto/ballista.proto
index 95b78fcc6d24..62b3185314fd 100644
--- a/ballista/rust/core/proto/ballista.proto
+++ b/ballista/rust/core/proto/ballista.proto
@@ -274,6 +274,7 @@ message PartitionedFile {
   string path = 1;
   uint64 size = 2;
   uint64 last_modified_ns = 3;
+  repeated ScalarValue partition_values = 4;
 }
 
 message CsvFormat {
@@ -294,7 +295,7 @@ message ListingTableScanNode {
   ProjectionColumns projection = 4;
   Schema schema = 5;
   repeated LogicalExprNode filters = 6;
-  repeated string partitions = 7;
+  repeated string table_partition_cols = 7;
   bool collect_stat = 8;
   uint32 target_partitions = 9;
   oneof FileFormatType {
@@ -613,33 +614,28 @@ message ScanLimit {
   uint32 limit = 1;
 }
 
-message ParquetScanExecNode {
+message FileScanExecConf {
   repeated FileGroup file_groups = 1;
   Schema schema = 2;
-  uint32 batch_size = 4;
-  repeated uint32 projection = 6;
-  ScanLimit limit = 7;
-  Statistics statistics = 8;
+  uint32 batch_size = 3;
+  repeated uint32 projection = 4;
+  ScanLimit limit = 5;
+  Statistics statistics = 6;
+  repeated string table_partition_cols = 7;
+}
+
+message ParquetScanExecNode {
+  FileScanExecConf base_conf = 1;
 }
 
 message CsvScanExecNode {
-  repeated FileGroup file_groups = 1;
-  Schema schema = 2;
-  bool has_header = 3;
-  uint32 batch_size = 4;
-  string delimiter = 5;
-  repeated uint32 projection = 6;
-  ScanLimit limit = 7;
-  Statistics statistics = 8;
+  FileScanExecConf base_conf = 1;
+  bool has_header = 2;
+  string delimiter = 3;
 }
 
 message AvroScanExecNode {
-  repeated FileGroup file_groups = 1;
-  Schema schema = 2;
-  uint32 batch_size = 4;
-  repeated uint32 projection = 6;
-  ScanLimit limit = 7;
-  Statistics statistics = 8;
+  FileScanExecConf base_conf = 1;
 }
 
 enum PartitionMode {
diff --git a/ballista/rust/core/src/serde/logical_plan/from_proto.rs b/ballista/rust/core/src/serde/logical_plan/from_proto.rs
index 26231c5e25c7..259fcb3482a7 100644
--- a/ballista/rust/core/src/serde/logical_plan/from_proto.rs
+++ b/ballista/rust/core/src/serde/logical_plan/from_proto.rs
@@ -191,7 +191,7 @@ impl TryInto<LogicalPlan> for &protobuf::LogicalPlanNode {
                 let options = ListingOptions {
                     file_extension: scan.file_extension.clone(),
                     format: file_format,
-                    partitions: scan.partitions.clone(),
+                    table_partition_cols: scan.table_partition_cols.clone(),
                     collect_stat: scan.collect_stat,
                     target_partitions: scan.target_partitions as usize,
                 };
diff --git a/ballista/rust/core/src/serde/logical_plan/to_proto.rs b/ballista/rust/core/src/serde/logical_plan/to_proto.rs
index ae25d72d57f9..1d1d48e8a4a3 100644
--- a/ballista/rust/core/src/serde/logical_plan/to_proto.rs
+++ b/ballista/rust/core/src/serde/logical_plan/to_proto.rs
@@ -755,8 +755,11 @@ impl TryInto<protobuf::LogicalPlanNode> for &LogicalPlan {
                                     .options()
                                     .file_extension
                                     .clone(),
-                                partitions: listing_table.options().partitions.clone(),
-                                path: listing_table.path().to_owned(),
+                                table_partition_cols: listing_table
+                                    .options()
+                                    .table_partition_cols
+                                    .clone(),
+                                path: listing_table.table_path().to_owned(),
                                 schema: Some(schema),
                                 projection,
                                 filters,
diff --git a/ballista/rust/core/src/serde/physical_plan/from_proto.rs b/ballista/rust/core/src/serde/physical_plan/from_proto.rs
index dce354ac69fa..99d2de03258f 100644
--- a/ballista/rust/core/src/serde/physical_plan/from_proto.rs
+++ b/ballista/rust/core/src/serde/physical_plan/from_proto.rs
@@ -30,7 +30,7 @@ use crate::serde::protobuf::ShuffleReaderPartition;
 use crate::serde::scheduler::PartitionLocation;
 use crate::serde::{from_proto_binary_op, proto_error, protobuf, str_to_byte};
 use crate::{convert_box_required, convert_required, into_required};
-use chrono::{DateTime, NaiveDateTime, TimeZone, Utc};
+use chrono::{TimeZone, Utc};
 use datafusion::arrow::datatypes::{DataType, Schema, SchemaRef};
 use datafusion::catalog::catalog::{
     CatalogList, CatalogProvider, MemoryCatalogList, MemoryCatalogProvider,
@@ -46,7 +46,9 @@ use datafusion::logical_plan::{
 };
 use datafusion::physical_plan::aggregates::{create_aggregate_expr, AggregateFunction};
 use datafusion::physical_plan::coalesce_partitions::CoalescePartitionsExec;
-use datafusion::physical_plan::file_format::{AvroExec, CsvExec, ParquetExec};
+use datafusion::physical_plan::file_format::{
+    AvroExec, CsvExec, ParquetExec, PhysicalPlanConfig,
+};
 use datafusion::physical_plan::hash_aggregate::{AggregateMode, HashAggregateExec};
 use datafusion::physical_plan::hash_join::PartitionMode;
 use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet;
@@ -118,64 +120,21 @@ impl TryInto<Arc<dyn ExecutionPlan>> for &protobuf::PhysicalPlanNode {
                     .try_into()?;
                 Ok(Arc::new(FilterExec::try_new(predicate, input)?))
             }
-            PhysicalPlanType::CsvScan(scan) => {
-                let schema = Arc::new(convert_required!(scan.schema)?);
-                let projection = scan.projection.iter().map(|i| *i as usize).collect();
-                let statistics = convert_required!(scan.statistics)?;
-
-                Ok(Arc::new(CsvExec::new(
-                    Arc::new(LocalFileSystem {}),
-                    scan.file_groups
-                        .iter()
-                        .map(|p| p.into())
-                        .collect::<Vec<Vec<PartitionedFile>>>(),
-                    statistics,
-                    schema,
-                    scan.has_header,
-                    str_to_byte(&scan.delimiter)?,
-                    Some(projection),
-                    scan.batch_size as usize,
-                    scan.limit.as_ref().map(|sl| sl.limit as usize),
-                )))
-            }
+            PhysicalPlanType::CsvScan(scan) => Ok(Arc::new(CsvExec::new(
+                scan.base_conf.as_ref().unwrap().try_into()?,
+                scan.has_header,
+                str_to_byte(&scan.delimiter)?,
+            ))),
             PhysicalPlanType::ParquetScan(scan) => {
-                let schema = Arc::new(convert_required!(scan.schema)?);
-                let projection = scan.projection.iter().map(|i| *i as usize).collect();
-                let statistics = convert_required!(scan.statistics)?;
-
                 Ok(Arc::new(ParquetExec::new(
-                    Arc::new(LocalFileSystem {}),
-                    scan.file_groups
-                        .iter()
-                        .map(|p| p.into())
-                        .collect::<Vec<Vec<PartitionedFile>>>(),
-                    statistics,
-                    schema,
-                    Some(projection),
+                    scan.base_conf.as_ref().unwrap().try_into()?,
                     // TODO predicate should be de-serialized
                     None,
-                    scan.batch_size as usize,
-                    scan.limit.as_ref().map(|sl| sl.limit as usize),
-                )))
-            }
-            PhysicalPlanType::AvroScan(scan) => {
-                let schema = Arc::new(convert_required!(scan.schema)?);
-                let projection = scan.projection.iter().map(|i| *i as usize).collect();
-                let statistics = convert_required!(scan.statistics)?;
-
-                Ok(Arc::new(AvroExec::new(
-                    Arc::new(LocalFileSystem {}),
-                    scan.file_groups
-                        .iter()
-                        .map(|p| p.into())
-                        .collect::<Vec<Vec<PartitionedFile>>>(),
-                    statistics,
-                    schema,
-                    Some(projection),
-                    scan.batch_size as usize,
-                    scan.limit.as_ref().map(|sl| sl.limit as usize),
                 )))
             }
+            PhysicalPlanType::AvroScan(scan) => Ok(Arc::new(AvroExec::new(
+                scan.base_conf.as_ref().unwrap().try_into()?,
+            ))),
             PhysicalPlanType::CoalesceBatches(coalesce_batches) => {
                 let input: Arc<dyn ExecutionPlan> =
                     convert_box_required!(coalesce_batches.input)?;
@@ -738,9 +697,11 @@ pub fn parse_protobuf_hash_partitioning(
     }
 }
 
-impl From<&protobuf::PartitionedFile> for PartitionedFile {
-    fn from(val: &protobuf::PartitionedFile) -> Self {
-        PartitionedFile {
+impl TryFrom<&protobuf::PartitionedFile> for PartitionedFile {
+    type Error = BallistaError;
+
+    fn try_from(val: &protobuf::PartitionedFile) -> Result<Self, Self::Error> {
+        Ok(PartitionedFile {
             file_meta: FileMeta {
                 sized_file: SizedFile {
                     path: val.path.clone(),
@@ -752,13 +713,23 @@ impl From<&protobuf::PartitionedFile> for PartitionedFile {
                     Some(Utc.timestamp_nanos(val.last_modified_ns as i64))
                 },
             },
-        }
+            partition_values: val
+                .partition_values
+                .iter()
+                .map(|v| v.try_into())
+                .collect::<Result<Vec<_>, _>>()?,
+        })
     }
 }
 
-impl From<&protobuf::FileGroup> for Vec<PartitionedFile> {
-    fn from(val: &protobuf::FileGroup) -> Self {
-        val.files.iter().map(|f| f.into()).collect()
+impl TryFrom<&protobuf::FileGroup> for Vec<PartitionedFile> {
+    type Error = BallistaError;
+
+    fn try_from(val: &protobuf::FileGroup) -> Result<Self, Self::Error> {
+        val.files
+            .iter()
+            .map(|f| f.try_into())
+            .collect::<Result<Vec<_>, _>>()
     }
 }
 
@@ -795,3 +766,37 @@ impl TryInto<Statistics> for &protobuf::Statistics {
         })
     }
 }
+
+impl TryInto<PhysicalPlanConfig> for &protobuf::FileScanExecConf {
+    type Error = BallistaError;
+
+    fn try_into(self) -> Result<PhysicalPlanConfig, Self::Error> {
+        let schema = Arc::new(convert_required!(self.schema)?);
+        let projection = self
+            .projection
+            .iter()
+            .map(|i| *i as usize)
+            .collect::<Vec<_>>();
+        let projection = if projection.is_empty() {
+            None
+        } else {
+            Some(projection)
+        };
+        let statistics = convert_required!(self.statistics)?;
+
+        Ok(PhysicalPlanConfig {
+            object_store: Arc::new(LocalFileSystem {}),
+            file_schema: schema,
+            file_groups: self
+                .file_groups
+                .iter()
+                .map(|f| f.try_into())
+                .collect::<Result<Vec<_>, _>>()?,
+            statistics,
+            projection,
+            batch_size: self.batch_size as usize,
+            limit: self.limit.as_ref().map(|sl| sl.limit as usize),
+            table_partition_cols: vec![],
+        })
+    }
+}
diff --git a/ballista/rust/core/src/serde/physical_plan/to_proto.rs b/ballista/rust/core/src/serde/physical_plan/to_proto.rs
index 52285eea0a9c..afbb02a4f216 100644
--- a/ballista/rust/core/src/serde/physical_plan/to_proto.rs
+++ b/ballista/rust/core/src/serde/physical_plan/to_proto.rs
@@ -26,7 +26,6 @@ use std::{
     sync::Arc,
 };
 
-use datafusion::physical_plan::hash_aggregate::AggregateMode;
 use datafusion::physical_plan::hash_join::{HashJoinExec, PartitionMode};
 use datafusion::physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
 use datafusion::physical_plan::projection::ProjectionExec;
@@ -43,6 +42,9 @@ use datafusion::physical_plan::{
     file_format::ParquetExec,
 };
 use datafusion::physical_plan::{file_format::AvroExec, filter::FilterExec};
+use datafusion::physical_plan::{
+    file_format::PhysicalPlanConfig, hash_aggregate::AggregateMode,
+};
 use datafusion::{
     datasource::PartitionedFile, physical_plan::coalesce_batches::CoalesceBatchesExec,
 };
@@ -244,90 +246,29 @@ impl TryInto<protobuf::PhysicalPlanNode> for Arc<dyn ExecutionPlan> {
                 ))),
             })
         } else if let Some(exec) = plan.downcast_ref::<CsvExec>() {
-            let file_groups = exec
-                .file_groups()
-                .iter()
-                .map(|p| p.as_slice().into())
-                .collect();
             Ok(protobuf::PhysicalPlanNode {
                 physical_plan_type: Some(PhysicalPlanType::CsvScan(
                     protobuf::CsvScanExecNode {
-                        file_groups,
-                        statistics: Some((&exec.statistics()).into()),
-                        limit: exec
-                            .limit()
-                            .map(|l| protobuf::ScanLimit { limit: l as u32 }),
-                        projection: exec
-                            .projection()
-                            .as_ref()
-                            .ok_or_else(|| {
-                                BallistaError::General(
-                                    "projection in CsvExec does not exist.".to_owned(),
-                                )
-                            })?
-                            .iter()
-                            .map(|n| *n as u32)
-                            .collect(),
-                        schema: Some(exec.file_schema().as_ref().into()),
+                        base_conf: Some(exec.base_config().try_into()?),
                         has_header: exec.has_header(),
                         delimiter: byte_to_string(exec.delimiter())?,
-                        batch_size: exec.batch_size() as u32,
                     },
                 )),
             })
         } else if let Some(exec) = plan.downcast_ref::<ParquetExec>() {
-            let file_groups = exec
-                .file_groups()
-                .iter()
-                .map(|p| p.as_slice().into())
-                .collect();
-
             Ok(protobuf::PhysicalPlanNode {
                 physical_plan_type: Some(PhysicalPlanType::ParquetScan(
                     protobuf::ParquetScanExecNode {
-                        file_groups,
-                        statistics: Some((&exec.statistics()).into()),
-                        limit: exec
-                            .limit()
-                            .map(|l| protobuf::ScanLimit { limit: l as u32 }),
-                        schema: Some(exec.schema().as_ref().into()),
-                        projection: exec
-                            .projection()
-                            .as_ref()
-                            .iter()
-                            .map(|n| *n as u32)
-                            .collect(),
-                        batch_size: exec.batch_size() as u32,
+                        base_conf: Some(exec.base_config().try_into()?),
+                        // TODO serialize predicates
                     },
                 )),
             })
         } else if let Some(exec) = plan.downcast_ref::<AvroExec>() {
-            let file_groups = exec
-                .file_groups()
-                .iter()
-                .map(|p| p.as_slice().into())
-                .collect();
             Ok(protobuf::PhysicalPlanNode {
                 physical_plan_type: Some(PhysicalPlanType::AvroScan(
                     protobuf::AvroScanExecNode {
-                        file_groups,
-                        statistics: Some((&exec.statistics()).into()),
-                        limit: exec
-                            .limit()
-                            .map(|l| protobuf::ScanLimit { limit: l as u32 }),
-                        projection: exec
-                            .projection()
-                            .as_ref()
-                            .ok_or_else(|| {
-                                BallistaError::General(
-                                    "projection in AvroExec does not exist.".to_owned(),
-                                )
-                            })?
-                            .iter()
-                            .map(|n| *n as u32)
-                            .collect(),
-                        schema: Some(exec.file_schema().as_ref().into()),
-                        batch_size: exec.batch_size() as u32,
+                        base_conf: Some(exec.base_config().try_into()?),
                     },
                 )),
             })
@@ -674,9 +615,11 @@ fn try_parse_when_then_expr(
     })
 }
 
-impl From<&PartitionedFile> for protobuf::PartitionedFile {
-    fn from(pf: &PartitionedFile) -> protobuf::PartitionedFile {
-        protobuf::PartitionedFile {
+impl TryFrom<&PartitionedFile> for protobuf::PartitionedFile {
+    type Error = BallistaError;
+
+    fn try_from(pf: &PartitionedFile) -> Result<Self, Self::Error> {
+        Ok(protobuf::PartitionedFile {
             path: pf.file_meta.path().to_owned(),
             size: pf.file_meta.size(),
             last_modified_ns: pf
@@ -684,15 +627,25 @@ impl From<&PartitionedFile> for protobuf::PartitionedFile {
                 .last_modified
                 .map(|ts| ts.timestamp_nanos() as u64)
                 .unwrap_or(0),
-        }
+            partition_values: pf
+                .partition_values
+                .iter()
+                .map(|v| v.try_into())
+                .collect::<Result<Vec<_>, _>>()?,
+        })
     }
 }
 
-impl From<&[PartitionedFile]> for protobuf::FileGroup {
-    fn from(gr: &[PartitionedFile]) -> protobuf::FileGroup {
-        protobuf::FileGroup {
-            files: gr.iter().map(|f| f.into()).collect(),
-        }
+impl TryFrom<&[PartitionedFile]> for protobuf::FileGroup {
+    type Error = BallistaError;
+
+    fn try_from(gr: &[PartitionedFile]) -> Result<Self, Self::Error> {
+        Ok(protobuf::FileGroup {
+            files: gr
+                .iter()
+                .map(|f| f.try_into())
+                .collect::<Result<Vec<_>, _>>()?,
+        })
     }
 }
 
@@ -722,3 +675,32 @@ impl From<&Statistics> for protobuf::Statistics {
         }
     }
 }
+
+impl TryFrom<&PhysicalPlanConfig> for protobuf::FileScanExecConf {
+    type Error = BallistaError;
+    fn try_from(
+        conf: &PhysicalPlanConfig,
+    ) -> Result<protobuf::FileScanExecConf, Self::Error> {
+        let file_groups = conf
+            .file_groups
+            .iter()
+            .map(|p| p.as_slice().try_into())
+            .collect::<Result<Vec<_>, _>>()?;
+
+        Ok(protobuf::FileScanExecConf {
+            file_groups,
+            statistics: Some((&conf.statistics).into()),
+            limit: conf.limit.map(|l| protobuf::ScanLimit { limit: l as u32 }),
+            projection: conf
+                .projection
+                .as_ref()
+                .unwrap_or(&vec![])
+                .iter()
+                .map(|n| *n as u32)
+                .collect(),
+            schema: Some(conf.file_schema.as_ref().into()),
+            batch_size: conf.batch_size as u32,
+            table_partition_cols: conf.table_partition_cols.to_vec(),
+        })
+    }
+}
diff --git a/benchmarks/src/bin/tpch.rs b/benchmarks/src/bin/tpch.rs
index bfe87efb9f74..7bc6510ac2ed 100644
--- a/benchmarks/src/bin/tpch.rs
+++ b/benchmarks/src/bin/tpch.rs
@@ -496,7 +496,7 @@ fn get_table(
         file_extension: extension.to_owned(),
         target_partitions,
         collect_stat: true,
-        partitions: vec![],
+        table_partition_cols: vec![],
     };
 
     Ok(Arc::new(ListingTable::new(
diff --git a/datafusion/src/datasource/file_format/avro.rs b/datafusion/src/datasource/file_format/avro.rs
index c6326962e34a..515584b16c03 100644
--- a/datafusion/src/datasource/file_format/avro.rs
+++ b/datafusion/src/datasource/file_format/avro.rs
@@ -25,11 +25,12 @@ use arrow::{self, datatypes::SchemaRef};
 use async_trait::async_trait;
 use futures::StreamExt;
 
-use super::{FileFormat, PhysicalPlanConfig};
+use super::FileFormat;
 use crate::avro_to_arrow::read_avro_schema_from_reader;
 use crate::datasource::object_store::{ObjectReader, ObjectReaderStream};
 use crate::error::Result;
-use crate::physical_plan::file_format::AvroExec;
+use crate::logical_plan::Expr;
+use crate::physical_plan::file_format::{AvroExec, PhysicalPlanConfig};
 use crate::physical_plan::ExecutionPlan;
 use crate::physical_plan::Statistics;
 
@@ -61,16 +62,9 @@ impl FileFormat for AvroFormat {
     async fn create_physical_plan(
         &self,
         conf: PhysicalPlanConfig,
+        _filters: &[Expr],
     ) -> Result<Arc<dyn ExecutionPlan>> {
-        let exec = AvroExec::new(
-            conf.object_store,
-            conf.files,
-            conf.statistics,
-            conf.schema,
-            conf.projection,
-            conf.batch_size,
-            conf.limit,
-        );
+        let exec = AvroExec::new(conf);
         Ok(Arc::new(exec))
     }
 }
@@ -79,12 +73,9 @@ impl FileFormat for AvroFormat {
 #[cfg(feature = "avro")]
 mod tests {
     use crate::{
-        datasource::{
-            object_store::local::{
-                local_file_meta, local_object_reader, local_object_reader_stream,
-                LocalFileSystem,
-            },
-            PartitionedFile,
+        datasource::object_store::local::{
+            local_object_reader, local_object_reader_stream, local_unpartitioned_file,
+            LocalFileSystem,
         },
         physical_plan::collect,
     };
@@ -349,7 +340,7 @@ mod tests {
         let testdata = crate::test_util::arrow_test_data();
         let filename = format!("{}/avro/{}", testdata, file_name);
         let format = AvroFormat {};
-        let schema = format
+        let file_schema = format
             .infer_schema(local_object_reader_stream(vec![filename.clone()]))
             .await
             .expect("Schema inference");
@@ -357,20 +348,21 @@ mod tests {
             .infer_stats(local_object_reader(filename.clone()))
             .await
             .expect("Stats inference");
-        let files = vec![vec![PartitionedFile {
-            file_meta: local_file_meta(filename.to_owned()),
-        }]];
+        let file_groups = vec![vec![local_unpartitioned_file(filename.to_owned())]];
         let exec = format
-            .create_physical_plan(PhysicalPlanConfig {
-                object_store: Arc::new(LocalFileSystem {}),
-                schema,
-                files,
-                statistics,
-                projection: projection.clone(),
-                batch_size,
-                filters: vec![],
-                limit,
-            })
+            .create_physical_plan(
+                PhysicalPlanConfig {
+                    object_store: Arc::new(LocalFileSystem {}),
+                    file_schema,
+                    file_groups,
+                    statistics,
+                    projection: projection.clone(),
+                    batch_size,
+                    limit,
+                    table_partition_cols: vec![],
+                },
+                &[],
+            )
             .await?;
         Ok(exec)
     }
diff --git a/datafusion/src/datasource/file_format/csv.rs b/datafusion/src/datasource/file_format/csv.rs
index f9959943a2e4..337511316c51 100644
--- a/datafusion/src/datasource/file_format/csv.rs
+++ b/datafusion/src/datasource/file_format/csv.rs
@@ -25,10 +25,11 @@ use arrow::{self, datatypes::SchemaRef};
 use async_trait::async_trait;
 use futures::StreamExt;
 
-use super::{FileFormat, PhysicalPlanConfig};
+use super::FileFormat;
 use crate::datasource::object_store::{ObjectReader, ObjectReaderStream};
 use crate::error::Result;
-use crate::physical_plan::file_format::CsvExec;
+use crate::logical_plan::Expr;
+use crate::physical_plan::file_format::{CsvExec, PhysicalPlanConfig};
 use crate::physical_plan::ExecutionPlan;
 use crate::physical_plan::Statistics;
 
@@ -123,18 +124,9 @@ impl FileFormat for CsvFormat {
     async fn create_physical_plan(
         &self,
         conf: PhysicalPlanConfig,
+        _filters: &[Expr],
     ) -> Result<Arc<dyn ExecutionPlan>> {
-        let exec = CsvExec::new(
-            conf.object_store,
-            conf.files,
-            conf.statistics,
-            conf.schema,
-            self.has_header,
-            self.delimiter,
-            conf.projection,
-            conf.batch_size,
-            conf.limit,
-        );
+        let exec = CsvExec::new(conf, self.has_header, self.delimiter);
         Ok(Arc::new(exec))
     }
 }
@@ -148,10 +140,9 @@ mod tests {
         datasource::{
             file_format::PhysicalPlanConfig,
             object_store::local::{
-                local_file_meta, local_object_reader, local_object_reader_stream,
-                LocalFileSystem,
+                local_object_reader, local_object_reader_stream,
+                local_unpartitioned_file, LocalFileSystem,
             },
-            PartitionedFile,
         },
         physical_plan::collect,
     };
@@ -261,7 +252,7 @@ mod tests {
         let testdata = crate::test_util::arrow_test_data();
         let filename = format!("{}/csv/{}", testdata, file_name);
         let format = CsvFormat::default();
-        let schema = format
+        let file_schema = format
             .infer_schema(local_object_reader_stream(vec![filename.clone()]))
             .await
             .expect("Schema inference");
@@ -269,20 +260,21 @@ mod tests {
             .infer_stats(local_object_reader(filename.clone()))
             .await
             .expect("Stats inference");
-        let files = vec![vec![PartitionedFile {
-            file_meta: local_file_meta(filename.to_owned()),
-        }]];
+        let file_groups = vec![vec![local_unpartitioned_file(filename.to_owned())]];
         let exec = format
-            .create_physical_plan(PhysicalPlanConfig {
-                object_store: Arc::new(LocalFileSystem {}),
-                schema,
-                files,
-                statistics,
-                projection: projection.clone(),
-                batch_size,
-                filters: vec![],
-                limit,
-            })
+            .create_physical_plan(
+                PhysicalPlanConfig {
+                    object_store: Arc::new(LocalFileSystem {}),
+                    file_schema,
+                    file_groups,
+                    statistics,
+                    projection: projection.clone(),
+                    batch_size,
+                    limit,
+                    table_partition_cols: vec![],
+                },
+                &[],
+            )
             .await?;
         Ok(exec)
     }
diff --git a/datafusion/src/datasource/file_format/json.rs b/datafusion/src/datasource/file_format/json.rs
index a579831c7241..72bbee665a61 100644
--- a/datafusion/src/datasource/file_format/json.rs
+++ b/datafusion/src/datasource/file_format/json.rs
@@ -32,6 +32,7 @@ use super::FileFormat;
 use super::PhysicalPlanConfig;
 use crate::datasource::object_store::{ObjectReader, ObjectReaderStream};
 use crate::error::Result;
+use crate::logical_plan::Expr;
 use crate::physical_plan::file_format::NdJsonExec;
 use crate::physical_plan::ExecutionPlan;
 use crate::physical_plan::Statistics;
@@ -93,16 +94,9 @@ impl FileFormat for JsonFormat {
     async fn create_physical_plan(
         &self,
         conf: PhysicalPlanConfig,
+        _filters: &[Expr],
     ) -> Result<Arc<dyn ExecutionPlan>> {
-        let exec = NdJsonExec::new(
-            conf.object_store,
-            conf.files,
-            conf.statistics,
-            conf.schema,
-            conf.projection,
-            conf.batch_size,
-            conf.limit,
-        );
+        let exec = NdJsonExec::new(conf);
         Ok(Arc::new(exec))
     }
 }
@@ -116,10 +110,9 @@ mod tests {
         datasource::{
             file_format::PhysicalPlanConfig,
             object_store::local::{
-                local_file_meta, local_object_reader, local_object_reader_stream,
-                LocalFileSystem,
+                local_object_reader, local_object_reader_stream,
+                local_unpartitioned_file, LocalFileSystem,
             },
-            PartitionedFile,
         },
         physical_plan::collect,
     };
@@ -212,7 +205,7 @@ mod tests {
     ) -> Result<Arc<dyn ExecutionPlan>> {
         let filename = "tests/jsons/2.json";
         let format = JsonFormat::default();
-        let schema = format
+        let file_schema = format
             .infer_schema(local_object_reader_stream(vec![filename.to_owned()]))
             .await
             .expect("Schema inference");
@@ -220,20 +213,21 @@ mod tests {
             .infer_stats(local_object_reader(filename.to_owned()))
             .await
             .expect("Stats inference");
-        let files = vec![vec![PartitionedFile {
-            file_meta: local_file_meta(filename.to_owned()),
-        }]];
+        let file_groups = vec![vec![local_unpartitioned_file(filename.to_owned())]];
         let exec = format
-            .create_physical_plan(PhysicalPlanConfig {
-                object_store: Arc::new(LocalFileSystem {}),
-                schema,
-                files,
-                statistics,
-                projection: projection.clone(),
-                batch_size,
-                filters: vec![],
-                limit,
-            })
+            .create_physical_plan(
+                PhysicalPlanConfig {
+                    object_store: Arc::new(LocalFileSystem {}),
+                    file_schema,
+                    file_groups,
+                    statistics,
+                    projection: projection.clone(),
+                    batch_size,
+                    limit,
+                    table_partition_cols: vec![],
+                },
+                &[],
+            )
             .await?;
         Ok(exec)
     }
diff --git a/datafusion/src/datasource/file_format/mod.rs b/datafusion/src/datasource/file_format/mod.rs
index d545596f6e5c..54491615fc4c 100644
--- a/datafusion/src/datasource/file_format/mod.rs
+++ b/datafusion/src/datasource/file_format/mod.rs
@@ -29,33 +29,12 @@ use std::sync::Arc;
 use crate::arrow::datatypes::SchemaRef;
 use crate::error::Result;
 use crate::logical_plan::Expr;
+use crate::physical_plan::file_format::PhysicalPlanConfig;
 use crate::physical_plan::{ExecutionPlan, Statistics};
 
 use async_trait::async_trait;
 
-use super::object_store::{ObjectReader, ObjectReaderStream, ObjectStore};
-use super::PartitionedFile;
-
-/// The configurations to be passed when creating a physical plan for
-/// a given file format.
-pub struct PhysicalPlanConfig {
-    /// Store from which the `files` should be fetched
-    pub object_store: Arc<dyn ObjectStore>,
-    /// Schema before projection
-    pub schema: SchemaRef,
-    /// List of files to be processed, grouped into partitions
-    pub files: Vec<Vec<PartitionedFile>>,
-    /// Estimated overall statistics of the plan, taking `filters` into account
-    pub statistics: Statistics,
-    /// Columns on which to project the data
-    pub projection: Option<Vec<usize>>,
-    /// The maximum number of records per arrow column
-    pub batch_size: usize,
-    /// The filters that were pushed down to this execution plan
-    pub filters: Vec<Expr>,
-    /// The minimum number of records required from this source plan
-    pub limit: Option<usize>,
-}
+use super::object_store::{ObjectReader, ObjectReaderStream};
 
 /// This trait abstracts all the file format specific implementations
 /// from the `TableProvider`. This helps code re-utilization accross
@@ -81,5 +60,6 @@ pub trait FileFormat: Send + Sync + fmt::Debug {
     async fn create_physical_plan(
         &self,
         conf: PhysicalPlanConfig,
+        filters: &[Expr],
     ) -> Result<Arc<dyn ExecutionPlan>>;
 }
diff --git a/datafusion/src/datasource/file_format/parquet.rs b/datafusion/src/datasource/file_format/parquet.rs
index 424a2985a3f7..819f37448636 100644
--- a/datafusion/src/datasource/file_format/parquet.rs
+++ b/datafusion/src/datasource/file_format/parquet.rs
@@ -42,6 +42,7 @@ use crate::datasource::{create_max_min_accs, get_col_stats};
 use crate::error::DataFusionError;
 use crate::error::Result;
 use crate::logical_plan::combine_filters;
+use crate::logical_plan::Expr;
 use crate::physical_plan::expressions::{MaxAccumulator, MinAccumulator};
 use crate::physical_plan::file_format::ParquetExec;
 use crate::physical_plan::ExecutionPlan;
@@ -104,26 +105,18 @@ impl FileFormat for ParquetFormat {
     async fn create_physical_plan(
         &self,
         conf: PhysicalPlanConfig,
+        filters: &[Expr],
     ) -> Result<Arc<dyn ExecutionPlan>> {
         // If enable pruning then combine the filters to build the predicate.
         // If disable pruning then set the predicate to None, thus readers
         // will not prune data based on the statistics.
         let predicate = if self.enable_pruning {
-            combine_filters(&conf.filters)
+            combine_filters(filters)
         } else {
             None
         };
 
-        Ok(Arc::new(ParquetExec::new(
-            conf.object_store,
-            conf.files,
-            conf.statistics,
-            conf.schema,
-            conf.projection,
-            predicate,
-            conf.batch_size,
-            conf.limit,
-        )))
+        Ok(Arc::new(ParquetExec::new(conf, predicate)))
     }
 }
 
@@ -330,12 +323,9 @@ impl ChunkReader for ChunkObjectReader {
 #[cfg(test)]
 mod tests {
     use crate::{
-        datasource::{
-            object_store::local::{
-                local_file_meta, local_object_reader, local_object_reader_stream,
-                LocalFileSystem,
-            },
-            PartitionedFile,
+        datasource::object_store::local::{
+            local_object_reader, local_object_reader_stream, local_unpartitioned_file,
+            LocalFileSystem,
         },
         physical_plan::collect,
     };
@@ -595,7 +585,7 @@ mod tests {
         let testdata = crate::test_util::parquet_test_data();
         let filename = format!("{}/{}", testdata, file_name);
         let format = ParquetFormat::default();
-        let schema = format
+        let file_schema = format
             .infer_schema(local_object_reader_stream(vec![filename.clone()]))
             .await
             .expect("Schema inference");
@@ -603,20 +593,21 @@ mod tests {
             .infer_stats(local_object_reader(filename.clone()))
             .await
             .expect("Stats inference");
-        let files = vec![vec![PartitionedFile {
-            file_meta: local_file_meta(filename.clone()),
-        }]];
+        let file_groups = vec![vec![local_unpartitioned_file(filename.clone())]];
         let exec = format
-            .create_physical_plan(PhysicalPlanConfig {
-                object_store: Arc::new(LocalFileSystem {}),
-                schema,
-                files,
-                statistics,
-                projection: projection.clone(),
-                batch_size,
-                filters: vec![],
-                limit,
-            })
+            .create_physical_plan(
+                PhysicalPlanConfig {
+                    object_store: Arc::new(LocalFileSystem {}),
+                    file_schema,
+                    file_groups,
+                    statistics,
+                    projection: projection.clone(),
+                    batch_size,
+                    limit,
+                    table_partition_cols: vec![],
+                },
+                &[],
+            )
             .await?;
         Ok(exec)
     }
diff --git a/datafusion/src/datasource/listing/helpers.rs b/datafusion/src/datasource/listing/helpers.rs
new file mode 100644
index 000000000000..912179c36f06
--- /dev/null
+++ b/datafusion/src/datasource/listing/helpers.rs
@@ -0,0 +1,723 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Helper functions for the table implementation
+
+use std::sync::Arc;
+
+use arrow::{
+    array::{
+        Array, ArrayBuilder, ArrayRef, Date64Array, Date64Builder, StringArray,
+        StringBuilder, UInt64Array, UInt64Builder,
+    },
+    datatypes::{DataType, Field, Schema},
+    record_batch::RecordBatch,
+};
+use chrono::{TimeZone, Utc};
+use futures::{
+    stream::{self},
+    StreamExt, TryStreamExt,
+};
+use log::debug;
+
+use crate::{
+    error::Result,
+    execution::context::ExecutionContext,
+    logical_plan::{self, Expr, ExpressionVisitor, Recursion},
+    physical_plan::functions::Volatility,
+    scalar::ScalarValue,
+};
+
+use crate::datasource::{
+    object_store::{FileMeta, ObjectStore, SizedFile},
+    MemTable, PartitionedFile, PartitionedFileStream,
+};
+
+const FILE_SIZE_COLUMN_NAME: &str = "_df_part_file_size_";
+const FILE_PATH_COLUMN_NAME: &str = "_df_part_file_path_";
+const FILE_MODIFIED_COLUMN_NAME: &str = "_df_part_file_modified_";
+
+/// The `ExpressionVisitor` for `expr_applicable_for_cols`. Walks the tree to
+/// validate that the given expression is applicable with only the `col_names`
+/// set of columns.
+struct ApplicabilityVisitor<'a> {
+    col_names: &'a [String],
+    is_applicable: &'a mut bool,
+}
+
+impl ApplicabilityVisitor<'_> {
+    fn visit_volatility(self, volatility: Volatility) -> Recursion<Self> {
+        match volatility {
+            Volatility::Immutable => Recursion::Continue(self),
+            // TODO: Stable functions could be `applicable`, but that would require access to the context
+            Volatility::Stable | Volatility::Volatile => {
+                *self.is_applicable = false;
+                Recursion::Stop(self)
+            }
+        }
+    }
+}
+
+impl ExpressionVisitor for ApplicabilityVisitor<'_> {
+    fn pre_visit(self, expr: &Expr) -> Result<Recursion<Self>> {
+        let rec = match expr {
+            Expr::Column(logical_plan::Column { ref name, .. }) => {
+                *self.is_applicable &= self.col_names.contains(name);
+                Recursion::Stop(self) // leaf node anyway
+            }
+            Expr::Literal(_)
+            | Expr::Alias(_, _)
+            | Expr::ScalarVariable(_)
+            | Expr::Not(_)
+            | Expr::IsNotNull(_)
+            | Expr::IsNull(_)
+            | Expr::Negative(_)
+            | Expr::Cast { .. }
+            | Expr::TryCast { .. }
+            | Expr::BinaryExpr { .. }
+            | Expr::Between { .. }
+            | Expr::InList { .. }
+            | Expr::GetIndexedField { .. }
+            | Expr::Case { .. } => Recursion::Continue(self),
+
+            Expr::ScalarFunction { fun, .. } => self.visit_volatility(fun.volatility()),
+            Expr::ScalarUDF { fun, .. } => {
+                self.visit_volatility(fun.signature.volatility)
+            }
+
+            // TODO other expressions are not handled yet:
+            // - AGGREGATE, WINDOW and SORT should not end up in filter conditions, except maybe in some edge cases
+            // - Can `Wildcard` be considered as a `Literal`?
+            // - ScalarVariable could be `applicable`, but that would require access to the context
+            Expr::AggregateUDF { .. }
+            | Expr::AggregateFunction { .. }
+            | Expr::Sort { .. }
+            | Expr::WindowFunction { .. }
+            | Expr::Wildcard => {
+                *self.is_applicable = false;
+                Recursion::Stop(self)
+            }
+        };
+        Ok(rec)
+    }
+}
+
+/// Check whether the given expression can be resolved using only the columns `col_names`.
+/// This means that if this function returns true:
+/// - the table provider can filter the table partition values with this expression
+/// - the expression can be marked as `TableProviderFilterPushDown::Exact` once this filtering
+/// was performed
+pub fn expr_applicable_for_cols(col_names: &[String], expr: &Expr) -> bool {
+    let mut is_applicable = true;
+    expr.accept(ApplicabilityVisitor {
+        col_names,
+        is_applicable: &mut is_applicable,
+    })
+    .unwrap();
+    is_applicable
+}
+
+/// Partition the list of files into `n` groups
+pub fn split_files(
+    partitioned_files: Vec<PartitionedFile>,
+    n: usize,
+) -> Vec<Vec<PartitionedFile>> {
+    if partitioned_files.is_empty() {
+        return vec![];
+    }
+    // effectively this is div with rounding up instead of truncating
+    let chunk_size = (partitioned_files.len() + n - 1) / n;
+    partitioned_files
+        .chunks(chunk_size)
+        .map(|c| c.to_vec())
+        .collect()
+}
+
+/// Discover the partitions on the given path and prune out files
+/// that belong to irrelevant partitions using `filters` expressions.
+/// `filters` might contain expressions that can be resolved only at the
+/// file level (e.g. Parquet row group pruning).
+///
+/// TODO for tables with many files (10k+), it will usually more efficient
+/// to first list the folders relative to the first partition dimension,
+/// prune those, then list only the contain of the remaining folders.
+pub async fn pruned_partition_list(
+    store: &dyn ObjectStore,
+    table_path: &str,
+    filters: &[Expr],
+    file_extension: &str,
+    table_partition_cols: &[String],
+) -> Result<PartitionedFileStream> {
+    // if no partition col => simply list all the files
+    if table_partition_cols.is_empty() {
+        return Ok(Box::pin(
+            store
+                .list_file_with_suffix(table_path, file_extension)
+                .await?
+                .map(|f| {
+                    Ok(PartitionedFile {
+                        partition_values: vec![],
+                        file_meta: f?,
+                    })
+                }),
+        ));
+    }
+
+    let applicable_filters: Vec<_> = filters
+        .iter()
+        .filter(|f| expr_applicable_for_cols(table_partition_cols, f))
+        .collect();
+    let stream_path = table_path.to_owned();
+    if applicable_filters.is_empty() {
+        // Parse the partition values while listing all the files
+        // Note: We might avoid parsing the partition values if they are not used in any projection,
+        // but the cost of parsing will likely be far dominated by the time to fetch the listing from
+        // the object store.
+        let table_partition_cols_stream = table_partition_cols.to_vec();
+        Ok(Box::pin(
+            store
+                .list_file_with_suffix(table_path, file_extension)
+                .await?
+                .filter_map(move |f| {
+                    let stream_path = stream_path.clone();
+                    let table_partition_cols_stream = table_partition_cols_stream.clone();
+                    async move {
+                        let file_meta = match f {
+                            Ok(fm) => fm,
+                            Err(err) => return Some(Err(err)),
+                        };
+                        let parsed_path = parse_partitions_for_path(
+                            &stream_path,
+                            file_meta.path(),
+                            &table_partition_cols_stream,
+                        )
+                        .map(|p| {
+                            p.iter()
+                                .map(|&pn| ScalarValue::Utf8(Some(pn.to_owned())))
+                                .collect()
+                        });
+
+                        parsed_path.map(|partition_values| {
+                            Ok(PartitionedFile {
+                                partition_values,
+                                file_meta,
+                            })
+                        })
+                    }
+                }),
+        ))
+    } else {
+        // parse the partition values and serde them as a RecordBatch to filter them
+        // TODO avoid collecting but have a streaming memory table instead
+        let batches: Vec<RecordBatch> = store
+            .list_file_with_suffix(table_path, file_extension)
+            .await?
+            // TODO we set an arbitrary high batch size here, it does not matter as we list
+            // all the files anyway. This number will need to be adjusted according to the object
+            // store if we switch to a streaming-stlye pruning of the files. For instance S3 lists
+            // 1000 items at a time so batches of 1000 would be ideal with S3 as store.
+            .chunks(1024)
+            .map(|v| v.into_iter().collect::<Result<Vec<_>>>())
+            .map(move |metas| paths_to_batch(table_partition_cols, &stream_path, &metas?))
+            .try_collect()
+            .await?;
+
+        let mem_table = MemTable::try_new(batches[0].schema(), vec![batches])?;
+
+        // Filter the partitions using a local datafusion context
+        // TODO having the external context would allow us to resolve `Volatility::Stable`
+        // scalar functions (`ScalarFunction` & `ScalarUDF`) and `ScalarVariable`s
+        let mut ctx = ExecutionContext::new();
+        let mut df = ctx.read_table(Arc::new(mem_table))?;
+        for filter in applicable_filters {
+            df = df.filter(filter.clone())?;
+        }
+        let filtered_batches = df.collect().await?;
+
+        Ok(Box::pin(stream::iter(
+            batches_to_paths(&filtered_batches).into_iter().map(Ok),
+        )))
+    }
+}
+
+/// convert the paths of the files to a record batch with the following columns:
+/// - one column for the file size named `_df_part_file_size_`
+/// - one column for with the original path named `_df_part_file_path_`
+/// - one column for with the last modified date named `_df_part_file_modified_`
+/// - ... one column by partition ...
+///
+/// Note: For the last modified date, this looses precisions higher than millisecond.
+fn paths_to_batch(
+    table_partition_cols: &[String],
+    table_path: &str,
+    metas: &[FileMeta],
+) -> Result<RecordBatch> {
+    let mut key_builder = StringBuilder::new(metas.len());
+    let mut length_builder = UInt64Builder::new(metas.len());
+    let mut modified_builder = Date64Builder::new(metas.len());
+    let mut partition_builders = table_partition_cols
+        .iter()
+        .map(|_| StringBuilder::new(metas.len()))
+        .collect::<Vec<_>>();
+    for file_meta in metas {
+        if let Some(partition_values) =
+            parse_partitions_for_path(table_path, file_meta.path(), table_partition_cols)
+        {
+            key_builder.append_value(file_meta.path())?;
+            length_builder.append_value(file_meta.size())?;
+            match file_meta.last_modified {
+                Some(lm) => modified_builder.append_value(lm.timestamp_millis())?,
+                None => modified_builder.append_null()?,
+            }
+            for (i, part_val) in partition_values.iter().enumerate() {
+                partition_builders[i].append_value(part_val)?;
+            }
+        } else {
+            debug!("No partitioning for path {}", file_meta.path());
+        }
+    }
+
+    // finish all builders
+    let mut col_arrays: Vec<ArrayRef> = vec![
+        ArrayBuilder::finish(&mut key_builder),
+        ArrayBuilder::finish(&mut length_builder),
+        ArrayBuilder::finish(&mut modified_builder),
+    ];
+    for mut partition_builder in partition_builders {
+        col_arrays.push(ArrayBuilder::finish(&mut partition_builder));
+    }
+
+    // put the schema together
+    let mut fields = vec![
+        Field::new(FILE_PATH_COLUMN_NAME, DataType::Utf8, false),
+        Field::new(FILE_SIZE_COLUMN_NAME, DataType::UInt64, false),
+        Field::new(FILE_MODIFIED_COLUMN_NAME, DataType::Date64, false),
+    ];
+    for pn in table_partition_cols {
+        fields.push(Field::new(pn, DataType::Utf8, false));
+    }
+
+    let batch = RecordBatch::try_new(Arc::new(Schema::new(fields)), col_arrays)?;
+    Ok(batch)
+}
+
+/// convert a set of record batches created by `paths_to_batch()` back to partitioned files.
+fn batches_to_paths(batches: &[RecordBatch]) -> Vec<PartitionedFile> {
+    batches
+        .iter()
+        .flat_map(|batch| {
+            let key_array = batch
+                .column(0)
+                .as_any()
+                .downcast_ref::<StringArray>()
+                .unwrap();
+            let length_array = batch
+                .column(1)
+                .as_any()
+                .downcast_ref::<UInt64Array>()
+                .unwrap();
+            let modified_array = batch
+                .column(2)
+                .as_any()
+                .downcast_ref::<Date64Array>()
+                .unwrap();
+
+            (0..batch.num_rows()).map(move |row| PartitionedFile {
+                file_meta: FileMeta {
+                    last_modified: match modified_array.is_null(row) {
+                        false => Some(Utc.timestamp_millis(modified_array.value(row))),
+                        true => None,
+                    },
+                    sized_file: SizedFile {
+                        path: key_array.value(row).to_owned(),
+                        size: length_array.value(row),
+                    },
+                },
+                partition_values: (3..batch.columns().len())
+                    .map(|col| {
+                        ScalarValue::try_from_array(batch.column(col), row).unwrap()
+                    })
+                    .collect(),
+            })
+        })
+        .collect()
+}
+
+/// Extract the partition values for the given `file_path` (in the given `table_path`)
+/// associated to the partitions defined by `table_partition_cols`
+fn parse_partitions_for_path<'a>(
+    table_path: &str,
+    file_path: &'a str,
+    table_partition_cols: &[String],
+) -> Option<Vec<&'a str>> {
+    let subpath = file_path.strip_prefix(table_path)?;
+
+    // ignore whether table_path ended with "/" or not
+    let subpath = match subpath.strip_prefix('/') {
+        Some(subpath) => subpath,
+        None => subpath,
+    };
+
+    let mut part_values = vec![];
+    for (path, pn) in subpath.split('/').zip(table_partition_cols) {
+        match path.split_once('=') {
+            Some((name, val)) if name == pn => part_values.push(val),
+            _ => return None,
+        }
+    }
+    Some(part_values)
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::{
+        logical_plan::{case, col, lit},
+        test::object_store::TestObjectStore,
+    };
+
+    use super::*;
+
+    #[test]
+    fn test_split_files() {
+        let new_partitioned_file = |path: &str| PartitionedFile::new(path.to_owned(), 10);
+        let files = vec![
+            new_partitioned_file("a"),
+            new_partitioned_file("b"),
+            new_partitioned_file("c"),
+            new_partitioned_file("d"),
+            new_partitioned_file("e"),
+        ];
+
+        let chunks = split_files(files.clone(), 1);
+        assert_eq!(1, chunks.len());
+        assert_eq!(5, chunks[0].len());
+
+        let chunks = split_files(files.clone(), 2);
+        assert_eq!(2, chunks.len());
+        assert_eq!(3, chunks[0].len());
+        assert_eq!(2, chunks[1].len());
+
+        let chunks = split_files(files.clone(), 5);
+        assert_eq!(5, chunks.len());
+        assert_eq!(1, chunks[0].len());
+        assert_eq!(1, chunks[1].len());
+        assert_eq!(1, chunks[2].len());
+        assert_eq!(1, chunks[3].len());
+        assert_eq!(1, chunks[4].len());
+
+        let chunks = split_files(files, 123);
+        assert_eq!(5, chunks.len());
+        assert_eq!(1, chunks[0].len());
+        assert_eq!(1, chunks[1].len());
+        assert_eq!(1, chunks[2].len());
+        assert_eq!(1, chunks[3].len());
+        assert_eq!(1, chunks[4].len());
+
+        let chunks = split_files(vec![], 2);
+        assert_eq!(0, chunks.len());
+    }
+
+    #[tokio::test]
+    async fn test_pruned_partition_list_empty() {
+        let store = TestObjectStore::new_arc(&[
+            ("tablepath/mypartition=val1/notparquetfile", 100),
+            ("tablepath/file.parquet", 100),
+        ]);
+        let filter = Expr::eq(col("mypartition"), lit("val1"));
+        let pruned = pruned_partition_list(
+            store.as_ref(),
+            "tablepath/",
+            &[filter],
+            ".parquet",
+            &[String::from("mypartition")],
+        )
+        .await
+        .expect("partition pruning failed")
+        .collect::<Vec<_>>()
+        .await;
+
+        assert_eq!(pruned.len(), 0);
+    }
+
+    #[tokio::test]
+    async fn test_pruned_partition_list() {
+        let store = TestObjectStore::new_arc(&[
+            ("tablepath/mypartition=val1/file.parquet", 100),
+            ("tablepath/mypartition=val2/file.parquet", 100),
+            ("tablepath/mypartition=val1/other=val3/file.parquet", 100),
+        ]);
+        let filter = Expr::eq(col("mypartition"), lit("val1"));
+        let pruned = pruned_partition_list(
+            store.as_ref(),
+            "tablepath/",
+            &[filter],
+            ".parquet",
+            &[String::from("mypartition")],
+        )
+        .await
+        .expect("partition pruning failed")
+        .collect::<Vec<_>>()
+        .await;
+
+        assert_eq!(pruned.len(), 2);
+        let f1 = pruned[0].as_ref().expect("first item not an error");
+        assert_eq!(
+            &f1.file_meta.sized_file.path,
+            "tablepath/mypartition=val1/file.parquet"
+        );
+        assert_eq!(
+            &f1.partition_values,
+            &[ScalarValue::Utf8(Some(String::from("val1"))),]
+        );
+        let f2 = pruned[1].as_ref().expect("second item not an error");
+        assert_eq!(
+            &f2.file_meta.sized_file.path,
+            "tablepath/mypartition=val1/other=val3/file.parquet"
+        );
+        assert_eq!(
+            &f2.partition_values,
+            &[ScalarValue::Utf8(Some(String::from("val1"))),]
+        );
+    }
+
+    #[tokio::test]
+    async fn test_pruned_partition_list_multi() {
+        let store = TestObjectStore::new_arc(&[
+            ("tablepath/part1=p1v1/file.parquet", 100),
+            ("tablepath/part1=p1v2/part2=p2v1/file1.parquet", 100),
+            ("tablepath/part1=p1v2/part2=p2v1/file2.parquet", 100),
+            ("tablepath/part1=p1v3/part2=p2v1/file2.parquet", 100),
+            ("tablepath/part1=p1v2/part2=p2v2/file2.parquet", 100),
+        ]);
+        let filter1 = Expr::eq(col("part1"), lit("p1v2"));
+        let filter2 = Expr::eq(col("part2"), lit("p2v1"));
+        // filter3 cannot be resolved at partition pruning
+        let filter3 = Expr::eq(col("part2"), col("other"));
+        let pruned = pruned_partition_list(
+            store.as_ref(),
+            "tablepath/",
+            &[filter1, filter2, filter3],
+            ".parquet",
+            &[String::from("part1"), String::from("part2")],
+        )
+        .await
+        .expect("partition pruning failed")
+        .collect::<Vec<_>>()
+        .await;
+
+        assert_eq!(pruned.len(), 2);
+        let f1 = pruned[0].as_ref().expect("first item not an error");
+        assert_eq!(
+            &f1.file_meta.sized_file.path,
+            "tablepath/part1=p1v2/part2=p2v1/file1.parquet"
+        );
+        assert_eq!(
+            &f1.partition_values,
+            &[
+                ScalarValue::Utf8(Some(String::from("p1v2"))),
+                ScalarValue::Utf8(Some(String::from("p2v1")))
+            ]
+        );
+        let f2 = pruned[1].as_ref().expect("second item not an error");
+        assert_eq!(
+            &f2.file_meta.sized_file.path,
+            "tablepath/part1=p1v2/part2=p2v1/file2.parquet"
+        );
+        assert_eq!(
+            &f2.partition_values,
+            &[
+                ScalarValue::Utf8(Some(String::from("p1v2"))),
+                ScalarValue::Utf8(Some(String::from("p2v1")))
+            ]
+        );
+    }
+
+    #[test]
+    fn test_parse_partitions_for_path() {
+        assert_eq!(
+            Some(vec![]),
+            parse_partitions_for_path("bucket/mytable", "bucket/mytable/file.csv", &[])
+        );
+        assert_eq!(
+            None,
+            parse_partitions_for_path(
+                "bucket/othertable",
+                "bucket/mytable/file.csv",
+                &[]
+            )
+        );
+        assert_eq!(
+            None,
+            parse_partitions_for_path(
+                "bucket/mytable",
+                "bucket/mytable/file.csv",
+                &[String::from("mypartition")]
+            )
+        );
+        assert_eq!(
+            Some(vec!["v1"]),
+            parse_partitions_for_path(
+                "bucket/mytable",
+                "bucket/mytable/mypartition=v1/file.csv",
+                &[String::from("mypartition")]
+            )
+        );
+        assert_eq!(
+            Some(vec!["v1"]),
+            parse_partitions_for_path(
+                "bucket/mytable/",
+                "bucket/mytable/mypartition=v1/file.csv",
+                &[String::from("mypartition")]
+            )
+        );
+        // Only hive style partitioning supported for now:
+        assert_eq!(
+            None,
+            parse_partitions_for_path(
+                "bucket/mytable",
+                "bucket/mytable/v1/file.csv",
+                &[String::from("mypartition")]
+            )
+        );
+        assert_eq!(
+            Some(vec!["v1", "v2"]),
+            parse_partitions_for_path(
+                "bucket/mytable",
+                "bucket/mytable/mypartition=v1/otherpartition=v2/file.csv",
+                &[String::from("mypartition"), String::from("otherpartition")]
+            )
+        );
+        assert_eq!(
+            Some(vec!["v1"]),
+            parse_partitions_for_path(
+                "bucket/mytable",
+                "bucket/mytable/mypartition=v1/otherpartition=v2/file.csv",
+                &[String::from("mypartition")]
+            )
+        );
+    }
+
+    #[test]
+    fn test_path_batch_roundtrip_no_partiton() {
+        let files = vec![
+            FileMeta {
+                sized_file: SizedFile {
+                    path: String::from("mybucket/tablepath/part1=val1/file.parquet"),
+                    size: 100,
+                },
+                last_modified: Some(Utc.timestamp_millis(1634722979123)),
+            },
+            FileMeta {
+                sized_file: SizedFile {
+                    path: String::from("mybucket/tablepath/part1=val2/file.parquet"),
+                    size: 100,
+                },
+                last_modified: None,
+            },
+        ];
+
+        let batches = paths_to_batch(&[], "mybucket/tablepath", &files)
+            .expect("Serialization of file list to batch failed");
+
+        let parsed_files = batches_to_paths(&[batches]);
+        assert_eq!(parsed_files.len(), 2);
+        assert_eq!(&parsed_files[0].partition_values, &[]);
+        assert_eq!(&parsed_files[1].partition_values, &[]);
+
+        let parsed_metas = parsed_files
+            .into_iter()
+            .map(|pf| pf.file_meta)
+            .collect::<Vec<_>>();
+        assert_eq!(parsed_metas, files);
+    }
+
+    #[test]
+    fn test_path_batch_roundtrip_with_partition() {
+        let files = vec![
+            FileMeta {
+                sized_file: SizedFile {
+                    path: String::from("mybucket/tablepath/part1=val1/file.parquet"),
+                    size: 100,
+                },
+                last_modified: Some(Utc.timestamp_millis(1634722979123)),
+            },
+            FileMeta {
+                sized_file: SizedFile {
+                    path: String::from("mybucket/tablepath/part1=val2/file.parquet"),
+                    size: 100,
+                },
+                last_modified: None,
+            },
+        ];
+
+        let batches =
+            paths_to_batch(&[String::from("part1")], "mybucket/tablepath", &files)
+                .expect("Serialization of file list to batch failed");
+
+        let parsed_files = batches_to_paths(&[batches]);
+        assert_eq!(parsed_files.len(), 2);
+        assert_eq!(
+            &parsed_files[0].partition_values,
+            &[ScalarValue::Utf8(Some(String::from("val1")))]
+        );
+        assert_eq!(
+            &parsed_files[1].partition_values,
+            &[ScalarValue::Utf8(Some(String::from("val2")))]
+        );
+
+        let parsed_metas = parsed_files
+            .into_iter()
+            .map(|pf| pf.file_meta)
+            .collect::<Vec<_>>();
+        assert_eq!(parsed_metas, files);
+    }
+
+    #[test]
+    fn test_expr_applicable_for_cols() {
+        assert!(expr_applicable_for_cols(
+            &[String::from("c1")],
+            &Expr::eq(col("c1"), lit("value"))
+        ));
+        assert!(!expr_applicable_for_cols(
+            &[String::from("c1")],
+            &Expr::eq(col("c2"), lit("value"))
+        ));
+        assert!(!expr_applicable_for_cols(
+            &[String::from("c1")],
+            &Expr::eq(col("c1"), col("c2"))
+        ));
+        assert!(expr_applicable_for_cols(
+            &[String::from("c1"), String::from("c2")],
+            &Expr::eq(col("c1"), col("c2"))
+        ));
+        assert!(expr_applicable_for_cols(
+            &[String::from("c1"), String::from("c2")],
+            &(Expr::eq(col("c1"), col("c2").alias("c2_alias"))).not()
+        ));
+        assert!(expr_applicable_for_cols(
+            &[String::from("c1"), String::from("c2")],
+            &(case(col("c1"))
+                .when(lit("v1"), lit(true))
+                .otherwise(lit(false))
+                .expect("valid case expr"))
+        ));
+        // static expression not relvant in this context but we
+        // test it as an edge case anyway in case we want to generalize
+        // this helper function
+        assert!(expr_applicable_for_cols(&[], &lit(true)));
+    }
+}
diff --git a/datafusion/src/datasource/listing/mod.rs b/datafusion/src/datasource/listing/mod.rs
new file mode 100644
index 000000000000..c8b92418ba2f
--- /dev/null
+++ b/datafusion/src/datasource/listing/mod.rs
@@ -0,0 +1,24 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! A table that uses the `ObjectStore` listing capability
+//! to get the list of files to process.
+
+mod helpers;
+mod table;
+
+pub use table::{ListingOptions, ListingTable};
diff --git a/datafusion/src/datasource/listing.rs b/datafusion/src/datasource/listing/table.rs
similarity index 63%
rename from datafusion/src/datasource/listing.rs
rename to datafusion/src/datasource/listing/table.rs
index 4af82d0c185a..aadc340b46c9 100644
--- a/datafusion/src/datasource/listing.rs
+++ b/datafusion/src/datasource/listing/table.rs
@@ -15,30 +15,31 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! A table that uses the `ObjectStore` listing capability
-//! to get the list of files to process.
+//! The table implementation.
 
 use std::{any::Any, sync::Arc};
 
-use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
+use arrow::datatypes::{Field, Schema, SchemaRef};
 use async_trait::async_trait;
 use futures::StreamExt;
 
 use crate::{
-    datasource::PartitionedFile,
-    error::{DataFusionError, Result},
+    error::Result,
     logical_plan::Expr,
-    physical_plan::{ExecutionPlan, Statistics},
+    physical_plan::{
+        empty::EmptyExec,
+        file_format::{PhysicalPlanConfig, DEFAULT_PARTITION_COLUMN_DATATYPE},
+        ExecutionPlan, Statistics,
+    },
 };
 
-use super::{
-    datasource::TableProviderFilterPushDown,
-    file_format::{FileFormat, PhysicalPlanConfig},
-    get_statistics_with_limit,
-    object_store::ObjectStore,
-    PartitionedFileStream, TableProvider,
+use crate::datasource::{
+    datasource::TableProviderFilterPushDown, file_format::FileFormat,
+    get_statistics_with_limit, object_store::ObjectStore, PartitionedFile, TableProvider,
 };
 
+use super::helpers::{expr_applicable_for_cols, pruned_partition_list, split_files};
+
 /// Options for creating a `ListingTable`
 pub struct ListingOptions {
     /// A suffix on which files should be filtered (leave empty to
@@ -51,9 +52,9 @@ pub struct ListingOptions {
     /// partitioning expected should be named "a" and "b":
     /// - If there is a third level of partitioning it will be ignored.
     /// - Files that don't follow this partitioning will be ignored.
-    /// Note that only `DataType::Utf8` is supported for the column type.
-    /// TODO implement case where partitions.len() > 0
-    pub partitions: Vec<String>,
+    /// Note that only `DEFAULT_PARTITION_COLUMN_DATATYPE` is currently
+    /// supported for the column type.
+    pub table_partition_cols: Vec<String>,
     /// Set true to try to guess statistics from the files.
     /// This can add a lot of overhead as it will usually require files
     /// to be opened and at least partially parsed.
@@ -74,14 +75,14 @@ impl ListingOptions {
         Self {
             file_extension: String::new(),
             format,
-            partitions: vec![],
+            table_partition_cols: vec![],
             collect_stat: false,
             target_partitions: 1,
         }
     }
 
     /// Infer the schema of the files at the given path on the provided object store.
-    /// The inferred schema should include the partitioning columns.
+    /// The inferred schema does not include the partitioning columns.
     ///
     /// This method will not be called by the table itself but before creating it.
     /// This way when creating the logical plan we can decide to resolve the schema
@@ -96,12 +97,7 @@ impl ListingOptions {
             .await?
             .map(move |file_meta| object_store.file_reader(file_meta?.sized_file));
         let file_schema = self.format.infer_schema(Box::pin(file_stream)).await?;
-        // Add the partition columns to the file schema
-        let mut fields = file_schema.fields().clone();
-        for part in &self.partitions {
-            fields.push(Field::new(part, DataType::Utf8, false));
-        }
-        Ok(Arc::new(Schema::new(fields)))
+        Ok(file_schema)
     }
 }
 
@@ -109,24 +105,40 @@ impl ListingOptions {
 /// or file system listing capability to get the list of files.
 pub struct ListingTable {
     object_store: Arc<dyn ObjectStore>,
-    path: String,
-    schema: SchemaRef,
+    table_path: String,
+    /// File fields only
+    file_schema: SchemaRef,
+    /// File fields + partition columns
+    table_schema: SchemaRef,
     options: ListingOptions,
 }
 
 impl ListingTable {
     /// Create new table that lists the FS to get the files to scan.
+    /// The provided `schema` must be resolved before creating the table
+    /// and should contain the fields of the file without the table
+    /// partitioning columns.
     pub fn new(
         object_store: Arc<dyn ObjectStore>,
-        path: String,
-        // the schema must be resolved before creating the table
-        schema: SchemaRef,
+        table_path: String,
+        file_schema: SchemaRef,
         options: ListingOptions,
     ) -> Self {
+        // Add the partition columns to the file schema
+        let mut table_fields = file_schema.fields().clone();
+        for part in &options.table_partition_cols {
+            table_fields.push(Field::new(
+                part,
+                DEFAULT_PARTITION_COLUMN_DATATYPE.clone(),
+                false,
+            ));
+        }
+
         Self {
             object_store,
-            path,
-            schema,
+            table_path,
+            file_schema,
+            table_schema: Arc::new(Schema::new(table_fields)),
             options,
         }
     }
@@ -136,8 +148,8 @@ impl ListingTable {
         &self.object_store
     }
     /// Get path ref
-    pub fn path(&self) -> &str {
-        &self.path
+    pub fn table_path(&self) -> &str {
+        &self.table_path
     }
     /// Get options ref
     pub fn options(&self) -> &ListingOptions {
@@ -152,7 +164,7 @@ impl TableProvider for ListingTable {
     }
 
     fn schema(&self) -> SchemaRef {
-        Arc::clone(&self.schema)
+        Arc::clone(&self.table_schema)
     }
 
     async fn scan(
@@ -162,58 +174,76 @@ impl TableProvider for ListingTable {
         filters: &[Expr],
         limit: Option<usize>,
     ) -> Result<Arc<dyn ExecutionPlan>> {
-        // TODO object_store_registry should be provided as param here
-        let (partitioned_file_lists, statistics) = self
-            .list_files_for_scan(
-                Arc::clone(&self.object_store),
-                &self.path,
-                filters,
-                limit,
-            )
-            .await?;
+        let (partitioned_file_lists, statistics) =
+            self.list_files_for_scan(filters, limit).await?;
+
+        // if no files need to be read, return an `EmptyExec`
+        if partitioned_file_lists.is_empty() {
+            let schema = self.schema();
+            let projected_schema = match &projection {
+                None => schema,
+                Some(p) => Arc::new(Schema::new(
+                    p.iter().map(|i| schema.field(*i).clone()).collect(),
+                )),
+            };
+            return Ok(Arc::new(EmptyExec::new(false, projected_schema)));
+        }
+
         // create the execution plan
         self.options
             .format
-            .create_physical_plan(PhysicalPlanConfig {
-                object_store: Arc::clone(&self.object_store),
-                schema: self.schema(),
-                files: partitioned_file_lists,
-                statistics,
-                projection: projection.clone(),
-                batch_size,
-                filters: filters.to_vec(),
-                limit,
-            })
+            .create_physical_plan(
+                PhysicalPlanConfig {
+                    object_store: Arc::clone(&self.object_store),
+                    file_schema: Arc::clone(&self.file_schema),
+                    file_groups: partitioned_file_lists,
+                    statistics,
+                    projection: projection.clone(),
+                    batch_size,
+                    limit,
+                    table_partition_cols: self.options.table_partition_cols.clone(),
+                },
+                filters,
+            )
             .await
     }
 
     fn supports_filter_pushdown(
         &self,
-        _filter: &Expr,
+        filter: &Expr,
     ) -> Result<TableProviderFilterPushDown> {
-        Ok(TableProviderFilterPushDown::Inexact)
+        if expr_applicable_for_cols(&self.options.table_partition_cols, filter) {
+            // if filter can be handled by partiton pruning, it is exact
+            Ok(TableProviderFilterPushDown::Exact)
+        } else {
+            // otherwise, we still might be able to handle the filter with file
+            // level mechanisms such as Parquet row group pruning.
+            Ok(TableProviderFilterPushDown::Inexact)
+        }
     }
 }
 
 impl ListingTable {
+    /// Get the list of files for a scan as well as the file level statistics.
+    /// The list is grouped to let the execution plan know how the files should
+    /// be distributed to different threads / executors.
     async fn list_files_for_scan<'a>(
         &'a self,
-        object_store: Arc<dyn ObjectStore>,
-        path: &'a str,
         filters: &'a [Expr],
         limit: Option<usize>,
     ) -> Result<(Vec<Vec<PartitionedFile>>, Statistics)> {
         // list files (with partitions)
         let file_list = pruned_partition_list(
-            object_store.as_ref(),
-            path,
+            self.object_store.as_ref(),
+            &self.table_path,
             filters,
             &self.options.file_extension,
-            &self.options.partitions,
+            &self.options.table_partition_cols,
         )
         .await?;
 
         // collect the statistics if required by the config
+        let object_store = Arc::clone(&self.object_store);
         let files = file_list.then(move |part_file| {
             let object_store = object_store.clone();
             async move {
@@ -232,13 +262,6 @@ impl ListingTable {
         let (files, statistics) =
             get_statistics_with_limit(files, self.schema(), limit).await?;
 
-        if files.is_empty() {
-            return Err(DataFusionError::Plan(format!(
-                "No files found at {} with file extension {}",
-                self.path, self.options.file_extension,
-            )));
-        }
-
         Ok((
             split_files(files, self.options.target_partitions),
             statistics,
@@ -246,98 +269,21 @@ impl ListingTable {
     }
 }
 
-/// Discover the partitions on the given path and prune out files
-/// relative to irrelevant partitions using `filters` expressions
-async fn pruned_partition_list(
-    store: &dyn ObjectStore,
-    path: &str,
-    _filters: &[Expr],
-    file_extension: &str,
-    partition_names: &[String],
-) -> Result<PartitionedFileStream> {
-    if partition_names.is_empty() {
-        Ok(Box::pin(
-            store
-                .list_file_with_suffix(path, file_extension)
-                .await?
-                .map(|f| Ok(PartitionedFile { file_meta: f? })),
-        ))
-    } else {
-        todo!("use filters to prune partitions")
-    }
-}
-
-fn split_files(
-    partitioned_files: Vec<PartitionedFile>,
-    n: usize,
-) -> Vec<Vec<PartitionedFile>> {
-    let mut chunk_size = partitioned_files.len() / n;
-    if partitioned_files.len() % n > 0 {
-        chunk_size += 1;
-    }
-    partitioned_files
-        .chunks(chunk_size)
-        .map(|c| c.to_vec())
-        .collect()
-}
-
 #[cfg(test)]
 mod tests {
+    use arrow::datatypes::DataType;
+
     use crate::{
         datasource::{
             file_format::{avro::AvroFormat, parquet::ParquetFormat},
-            object_store::{local::LocalFileSystem, FileMeta, ObjectStore, SizedFile},
+            object_store::local::LocalFileSystem,
         },
-        test::object_store::TestObjectStore,
+        logical_plan::{col, lit},
+        test::{columns, object_store::TestObjectStore},
     };
 
     use super::*;
 
-    #[test]
-    fn test_split_files() {
-        let new_partitioned_file = |path: &str| PartitionedFile {
-            file_meta: FileMeta {
-                sized_file: SizedFile {
-                    path: path.to_owned(),
-                    size: 10,
-                },
-                last_modified: None,
-            },
-        };
-        let files = vec![
-            new_partitioned_file("a"),
-            new_partitioned_file("b"),
-            new_partitioned_file("c"),
-            new_partitioned_file("d"),
-            new_partitioned_file("e"),
-        ];
-
-        let chunks = split_files(files.clone(), 1);
-        assert_eq!(1, chunks.len());
-        assert_eq!(5, chunks[0].len());
-
-        let chunks = split_files(files.clone(), 2);
-        assert_eq!(2, chunks.len());
-        assert_eq!(3, chunks[0].len());
-        assert_eq!(2, chunks[1].len());
-
-        let chunks = split_files(files.clone(), 5);
-        assert_eq!(5, chunks.len());
-        assert_eq!(1, chunks[0].len());
-        assert_eq!(1, chunks[1].len());
-        assert_eq!(1, chunks[2].len());
-        assert_eq!(1, chunks[3].len());
-        assert_eq!(1, chunks[4].len());
-
-        let chunks = split_files(files, 123);
-        assert_eq!(5, chunks.len());
-        assert_eq!(1, chunks[0].len());
-        assert_eq!(1, chunks[1].len());
-        assert_eq!(1, chunks[2].len());
-        assert_eq!(1, chunks[3].len());
-        assert_eq!(1, chunks[4].len());
-    }
-
     #[tokio::test]
     async fn read_single_file() -> Result<()> {
         let table = load_table("alltypes_plain.parquet").await?;
@@ -358,9 +304,47 @@ mod tests {
     }
 
     #[tokio::test]
-    async fn file_listings() -> Result<()> {
+    async fn read_empty_table() -> Result<()> {
+        let store = TestObjectStore::new_arc(&[("table/p1=v1/file.avro", 100)]);
+
+        let opt = ListingOptions {
+            file_extension: ".avro".to_owned(),
+            format: Arc::new(AvroFormat {}),
+            table_partition_cols: vec![String::from("p1")],
+            target_partitions: 4,
+            collect_stat: true,
+        };
+
+        let file_schema = Schema::new(vec![Field::new("a", DataType::Boolean, false)]);
+
+        let table =
+            ListingTable::new(store, "table/".to_owned(), Arc::new(file_schema), opt);
+        assert_eq!(
+            columns(&table.schema()),
+            vec!["a".to_owned(), "p1".to_owned()]
+        );
+
+        // this will filter out the only file in the store
+        let filter = Expr::not_eq(col("p1"), lit("v1"));
+
+        let scan = table
+            .scan(&None, 1024, &[filter], None)
+            .await
+            .expect("Empty execution plan");
+
+        assert!(scan.as_any().is::<EmptyExec>());
+        assert_eq!(
+            columns(&scan.schema()),
+            vec!["a".to_owned(), "p1".to_owned()]
+        );
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_assert_list_files_for_scan_grouping() -> Result<()> {
         // more expected partitions than files
-        assert_partitioning(
+        assert_list_files_for_scan_grouping(
             &[
                 "bucket/key-prefix/file0",
                 "bucket/key-prefix/file1",
@@ -375,7 +359,7 @@ mod tests {
         .await?;
 
         // as many expected partitions as files
-        assert_partitioning(
+        assert_list_files_for_scan_grouping(
             &[
                 "bucket/key-prefix/file0",
                 "bucket/key-prefix/file1",
@@ -389,7 +373,7 @@ mod tests {
         .await?;
 
         // more files as expected partitions
-        assert_partitioning(
+        assert_list_files_for_scan_grouping(
             &[
                 "bucket/key-prefix/file0",
                 "bucket/key-prefix/file1",
@@ -403,13 +387,11 @@ mod tests {
         )
         .await?;
 
-        // no files
-        assert_partitioning(&[], "bucket/key-prefix/", 2, 0)
-            .await
-            .expect_err("no files");
+        // no files => no groups
+        assert_list_files_for_scan_grouping(&[], "bucket/key-prefix/", 2, 0).await?;
 
         // files that don't match the prefix
-        assert_partitioning(
+        assert_list_files_for_scan_grouping(
             &[
                 "bucket/key-prefix/file0",
                 "bucket/key-prefix/file1",
@@ -429,7 +411,7 @@ mod tests {
         let opt = ListingOptions {
             file_extension: "parquet".to_owned(),
             format: Arc::new(ParquetFormat::default()),
-            partitions: vec![],
+            table_partition_cols: vec![],
             target_partitions: 2,
             collect_stat: true,
         };
@@ -445,13 +427,13 @@ mod tests {
 
     /// Check that the files listed by the table match the specified `output_partitioning`
     /// when the object store contains `files`.
-    async fn assert_partitioning(
+    async fn assert_list_files_for_scan_grouping(
         files: &[&str],
         table_prefix: &str,
         target_partitions: usize,
         output_partitioning: usize,
     ) -> Result<()> {
-        let mock_store: Arc<dyn ObjectStore> =
+        let mock_store =
             TestObjectStore::new_arc(&files.iter().map(|f| (*f, 10)).collect::<Vec<_>>());
 
         let format = AvroFormat {};
@@ -459,23 +441,17 @@ mod tests {
         let opt = ListingOptions {
             file_extension: "".to_owned(),
             format: Arc::new(format),
-            partitions: vec![],
+            table_partition_cols: vec![],
             target_partitions,
             collect_stat: true,
         };
 
         let schema = Schema::new(vec![Field::new("a", DataType::Boolean, false)]);
 
-        let table = ListingTable::new(
-            Arc::clone(&mock_store),
-            table_prefix.to_owned(),
-            Arc::new(schema),
-            opt,
-        );
+        let table =
+            ListingTable::new(mock_store, table_prefix.to_owned(), Arc::new(schema), opt);
 
-        let (file_list, _) = table
-            .list_files_for_scan(mock_store, table_prefix, &[], None)
-            .await?;
+        let (file_list, _) = table.list_files_for_scan(&[], None).await?;
 
         assert_eq!(file_list.len(), output_partitioning);
 
diff --git a/datafusion/src/datasource/mod.rs b/datafusion/src/datasource/mod.rs
index 2e5330f16cb7..9f4f77f7ea28 100644
--- a/datafusion/src/datasource/mod.rs
+++ b/datafusion/src/datasource/mod.rs
@@ -33,24 +33,25 @@ use crate::arrow::datatypes::{Schema, SchemaRef};
 use crate::error::Result;
 use crate::physical_plan::expressions::{MaxAccumulator, MinAccumulator};
 use crate::physical_plan::{Accumulator, ColumnStatistics, Statistics};
+use crate::scalar::ScalarValue;
 use futures::StreamExt;
 use std::pin::Pin;
 
-/// Get all files as well as the summary statistic
-/// if the optional `limit` is provided, includes only sufficient files
-/// needed to read up to `limit` number of rows
+/// Get all files as well as the file level summary statistics (no statistic for partition columns).
+/// If the optional `limit` is provided, includes only sufficient files.
+/// Needed to read up to `limit` number of rows.
 /// TODO fix case where `num_rows` and `total_byte_size` are not defined (stat should be None instead of Some(0))
 pub async fn get_statistics_with_limit(
     all_files: impl Stream<Item = Result<(PartitionedFile, Statistics)>>,
-    schema: SchemaRef,
+    file_schema: SchemaRef,
     limit: Option<usize>,
 ) -> Result<(Vec<PartitionedFile>, Statistics)> {
     let mut result_files = vec![];
 
     let mut total_byte_size = 0;
-    let mut null_counts = vec![0; schema.fields().len()];
+    let mut null_counts = vec![0; file_schema.fields().len()];
     let mut has_statistics = false;
-    let (mut max_values, mut min_values) = create_max_min_accs(&schema);
+    let (mut max_values, mut min_values) = create_max_min_accs(&file_schema);
 
     let mut num_rows = 0;
     let mut is_exact = true;
@@ -103,7 +104,7 @@ pub async fn get_statistics_with_limit(
 
     let column_stats = if has_statistics {
         Some(get_col_stats(
-            &*schema,
+            &*file_schema,
             null_counts,
             &mut max_values,
             &mut min_values,
@@ -128,8 +129,8 @@ pub async fn get_statistics_with_limit(
 pub struct PartitionedFile {
     /// Path for the file (e.g. URL, filesystem path, etc)
     pub file_meta: FileMeta,
-    // Values of partition columns to be appended to each row
-    // pub partition_value: Option<Vec<ScalarValue>>,
+    /// Values of partition columns to be appended to each row
+    pub partition_values: Vec<ScalarValue>,
     // We may include row group range here for a more fine-grained parallel execution
 }
 
@@ -141,6 +142,7 @@ impl PartitionedFile {
                 sized_file: SizedFile { path, size },
                 last_modified: None,
             },
+            partition_values: vec![],
         }
     }
 }
diff --git a/datafusion/src/datasource/object_store/local.rs b/datafusion/src/datasource/object_store/local.rs
index 4f4dbefbca49..b2a2ddfa950b 100644
--- a/datafusion/src/datasource/object_store/local.rs
+++ b/datafusion/src/datasource/object_store/local.rs
@@ -27,6 +27,7 @@ use futures::{stream, AsyncRead, StreamExt};
 use crate::datasource::object_store::{
     FileMeta, FileMetaStream, ListEntryStream, ObjectReader, ObjectStore,
 };
+use crate::datasource::PartitionedFile;
 use crate::error::DataFusionError;
 use crate::error::Result;
 
@@ -161,19 +162,22 @@ pub fn local_object_reader_stream(files: Vec<String>) -> ObjectReaderStream {
 /// Helper method to convert a file location to a `LocalFileReader`
 pub fn local_object_reader(file: String) -> Arc<dyn ObjectReader> {
     LocalFileSystem
-        .file_reader(local_file_meta(file).sized_file)
+        .file_reader(local_unpartitioned_file(file).file_meta.sized_file)
         .expect("File not found")
 }
 
 /// Helper method to fetch the file size and date at given path and create a `FileMeta`
-pub fn local_file_meta(file: String) -> FileMeta {
+pub fn local_unpartitioned_file(file: String) -> PartitionedFile {
     let metadata = fs::metadata(&file).expect("Local file metadata");
-    FileMeta {
-        sized_file: SizedFile {
-            size: metadata.len(),
-            path: file,
+    PartitionedFile {
+        file_meta: FileMeta {
+            sized_file: SizedFile {
+                size: metadata.len(),
+                path: file,
+            },
+            last_modified: metadata.modified().map(chrono::DateTime::from).ok(),
         },
-        last_modified: metadata.modified().map(chrono::DateTime::from).ok(),
+        partition_values: vec![],
     }
 }
 
diff --git a/datafusion/src/datasource/object_store/mod.rs b/datafusion/src/datasource/object_store/mod.rs
index 61bc47dc462c..59e184103d2a 100644
--- a/datafusion/src/datasource/object_store/mod.rs
+++ b/datafusion/src/datasource/object_store/mod.rs
@@ -70,7 +70,7 @@ pub enum ListEntry {
 }
 
 /// The path and size of the file.
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, PartialEq)]
 pub struct SizedFile {
     /// Path of the file. It is relative to the current object
     /// store (it does not specify the `xx://` scheme).
@@ -82,7 +82,7 @@ pub struct SizedFile {
 /// Description of a file as returned by the listing command of a
 /// given object store. The resulting path is relative to the
 /// object store that generated it.
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, PartialEq)]
 pub struct FileMeta {
     /// The path and size of the file.
     pub sized_file: SizedFile,
diff --git a/datafusion/src/execution/context.rs b/datafusion/src/execution/context.rs
index 01484568616b..9be5038f47c9 100644
--- a/datafusion/src/execution/context.rs
+++ b/datafusion/src/execution/context.rs
@@ -221,7 +221,7 @@ impl ExecutionContext {
                         .unwrap()
                         .config
                         .target_partitions,
-                    partitions: vec![],
+                    table_partition_cols: vec![],
                 };
 
                 // TODO make schema in CreateExternalTable optional instead of empty
@@ -442,7 +442,7 @@ impl ExecutionContext {
             collect_stat: true,
             file_extension: DEFAULT_PARQUET_EXTENSION.to_owned(),
             target_partitions,
-            partitions: vec![],
+            table_partition_cols: vec![],
         };
 
         self.register_listing_table(name, uri, listing_options, None)
diff --git a/datafusion/src/execution/options.rs b/datafusion/src/execution/options.rs
index f0ed6f24c325..c6b5ff646ea3 100644
--- a/datafusion/src/execution/options.rs
+++ b/datafusion/src/execution/options.rs
@@ -108,7 +108,7 @@ impl<'a> CsvReadOptions<'a> {
             collect_stat: false,
             file_extension: self.file_extension.to_owned(),
             target_partitions,
-            partitions: vec![],
+            table_partition_cols: vec![],
         }
     }
 }
@@ -143,7 +143,7 @@ impl<'a> AvroReadOptions<'a> {
             collect_stat: false,
             file_extension: self.file_extension.to_owned(),
             target_partitions,
-            partitions: vec![],
+            table_partition_cols: vec![],
         }
     }
 }
diff --git a/datafusion/src/logical_plan/builder.rs b/datafusion/src/logical_plan/builder.rs
index 09c3a14513e5..693bf78fbe0e 100644
--- a/datafusion/src/logical_plan/builder.rs
+++ b/datafusion/src/logical_plan/builder.rs
@@ -277,7 +277,7 @@ impl LogicalPlanBuilder {
             collect_stat: true,
             file_extension: DEFAULT_PARQUET_EXTENSION.to_owned(),
             target_partitions,
-            partitions: vec![],
+            table_partition_cols: vec![],
         };
 
         let path: String = path.into();
diff --git a/datafusion/src/physical_optimizer/repartition.rs b/datafusion/src/physical_optimizer/repartition.rs
index ea7de7f39839..8ac9dadd9548 100644
--- a/datafusion/src/physical_optimizer/repartition.rs
+++ b/datafusion/src/physical_optimizer/repartition.rs
@@ -109,26 +109,28 @@ mod tests {
     use arrow::datatypes::Schema;
 
     use super::*;
-    use crate::datasource::object_store::local::LocalFileSystem;
     use crate::datasource::PartitionedFile;
-    use crate::physical_plan::file_format::ParquetExec;
+    use crate::physical_plan::file_format::{ParquetExec, PhysicalPlanConfig};
     use crate::physical_plan::projection::ProjectionExec;
     use crate::physical_plan::Statistics;
     use crate::test::object_store::TestObjectStore;
 
     #[test]
     fn added_repartition_to_single_partition() -> Result<()> {
-        let schema = Arc::new(Schema::empty());
+        let file_schema = Arc::new(Schema::empty());
         let parquet_project = ProjectionExec::try_new(
             vec![],
             Arc::new(ParquetExec::new(
-                TestObjectStore::new_arc(&[("x", 100)]),
-                vec![vec![PartitionedFile::new("x".to_string(), 100)]],
-                Statistics::default(),
-                schema,
-                None,
-                None,
-                2048,
+                PhysicalPlanConfig {
+                    object_store: TestObjectStore::new_arc(&[("x", 100)]),
+                    file_schema,
+                    file_groups: vec![vec![PartitionedFile::new("x".to_string(), 100)]],
+                    statistics: Statistics::default(),
+                    projection: None,
+                    batch_size: 2048,
+                    limit: None,
+                    table_partition_cols: vec![],
+                },
                 None,
             )),
         )?;
@@ -152,19 +154,25 @@ mod tests {
 
     #[test]
     fn repartition_deepest_node() -> Result<()> {
-        let schema = Arc::new(Schema::empty());
+        let file_schema = Arc::new(Schema::empty());
         let parquet_project = ProjectionExec::try_new(
             vec![],
             Arc::new(ProjectionExec::try_new(
                 vec![],
                 Arc::new(ParquetExec::new(
-                    Arc::new(LocalFileSystem {}),
-                    vec![vec![PartitionedFile::new("x".to_string(), 100)]],
-                    Statistics::default(),
-                    schema,
-                    None,
-                    None,
-                    2048,
+                    PhysicalPlanConfig {
+                        object_store: TestObjectStore::new_arc(&[("x", 100)]),
+                        file_schema,
+                        file_groups: vec![vec![PartitionedFile::new(
+                            "x".to_string(),
+                            100,
+                        )]],
+                        statistics: Statistics::default(),
+                        projection: None,
+                        batch_size: 2048,
+                        limit: None,
+                        table_partition_cols: vec![],
+                    },
                     None,
                 )),
             )?),
diff --git a/datafusion/src/physical_plan/coalesce_partitions.rs b/datafusion/src/physical_plan/coalesce_partitions.rs
index 1fd18d2c4f37..9c133def8209 100644
--- a/datafusion/src/physical_plan/coalesce_partitions.rs
+++ b/datafusion/src/physical_plan/coalesce_partitions.rs
@@ -207,7 +207,7 @@ mod tests {
 
     use super::*;
     use crate::datasource::object_store::local::LocalFileSystem;
-    use crate::physical_plan::file_format::CsvExec;
+    use crate::physical_plan::file_format::{CsvExec, PhysicalPlanConfig};
     use crate::physical_plan::{collect, common};
     use crate::test::exec::{assert_strong_count_converges_to_zero, BlockingExec};
     use crate::test::{self, assert_is_pending};
@@ -220,15 +220,18 @@ mod tests {
         let (_, files) =
             test::create_partitioned_csv("aggregate_test_100.csv", num_partitions)?;
         let csv = CsvExec::new(
-            Arc::new(LocalFileSystem {}),
-            files,
-            Statistics::default(),
-            schema,
+            PhysicalPlanConfig {
+                object_store: Arc::new(LocalFileSystem {}),
+                file_schema: schema,
+                file_groups: files,
+                statistics: Statistics::default(),
+                projection: None,
+                batch_size: 1024,
+                limit: None,
+                table_partition_cols: vec![],
+            },
             true,
             b',',
-            None,
-            1024,
-            None,
         );
 
         // input should have 4 partitions
diff --git a/datafusion/src/physical_plan/file_format/avro.rs b/datafusion/src/physical_plan/file_format/avro.rs
index 2420040c08af..b50c0a082686 100644
--- a/datafusion/src/physical_plan/file_format/avro.rs
+++ b/datafusion/src/physical_plan/file_format/avro.rs
@@ -18,13 +18,11 @@
 //! Execution plan for reading line-delimited Avro files
 #[cfg(feature = "avro")]
 use crate::avro_to_arrow;
-use crate::datasource::object_store::ObjectStore;
-use crate::datasource::PartitionedFile;
 use crate::error::{DataFusionError, Result};
 use crate::physical_plan::{
     DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream, Statistics,
 };
-use arrow::datatypes::{Schema, SchemaRef};
+use arrow::datatypes::SchemaRef;
 #[cfg(feature = "avro")]
 use arrow::error::ArrowError;
 
@@ -34,68 +32,30 @@ use std::sync::Arc;
 
 #[cfg(feature = "avro")]
 use super::file_stream::{BatchIter, FileStream};
+use super::PhysicalPlanConfig;
 
 /// Execution plan for scanning Avro data source
 #[derive(Debug, Clone)]
 pub struct AvroExec {
-    object_store: Arc<dyn ObjectStore>,
-    file_groups: Vec<Vec<PartitionedFile>>,
-    statistics: Statistics,
-    file_schema: SchemaRef,
-    projection: Option<Vec<usize>>,
+    base_config: PhysicalPlanConfig,
+    projected_statistics: Statistics,
     projected_schema: SchemaRef,
-    batch_size: usize,
-    limit: Option<usize>,
 }
 
 impl AvroExec {
-    /// Create a new Avro reader execution plan provided file list and schema
-    pub fn new(
-        object_store: Arc<dyn ObjectStore>,
-        file_groups: Vec<Vec<PartitionedFile>>,
-        statistics: Statistics,
-        file_schema: SchemaRef,
-        projection: Option<Vec<usize>>,
-        batch_size: usize,
-        limit: Option<usize>,
-    ) -> Self {
-        let projected_schema = match &projection {
-            None => Arc::clone(&file_schema),
-            Some(p) => Arc::new(Schema::new(
-                p.iter().map(|i| file_schema.field(*i).clone()).collect(),
-            )),
-        };
+    /// Create a new Avro reader execution plan provided base configurations
+    pub fn new(base_config: PhysicalPlanConfig) -> Self {
+        let (projected_schema, projected_statistics) = base_config.project();
 
         Self {
-            object_store,
-            file_groups,
-            statistics,
-            file_schema,
-            projection,
+            base_config,
             projected_schema,
-            batch_size,
-            limit,
+            projected_statistics,
         }
     }
-    /// List of data files
-    pub fn file_groups(&self) -> &[Vec<PartitionedFile>] {
-        &self.file_groups
-    }
-    /// The schema before projection
-    pub fn file_schema(&self) -> &SchemaRef {
-        &self.file_schema
-    }
-    /// Optional projection for which columns to load
-    pub fn projection(&self) -> &Option<Vec<usize>> {
-        &self.projection
-    }
-    /// Batch size
-    pub fn batch_size(&self) -> usize {
-        self.batch_size
-    }
-    /// Limit in nr. of rows
-    pub fn limit(&self) -> Option<usize> {
-        self.limit
+    /// Ref to the base configs
+    pub fn base_config(&self) -> &PhysicalPlanConfig {
+        &self.base_config
     }
 }
 
@@ -110,7 +70,7 @@ impl ExecutionPlan for AvroExec {
     }
 
     fn output_partitioning(&self) -> Partitioning {
-        Partitioning::UnknownPartitioning(self.file_groups.len())
+        Partitioning::UnknownPartitioning(self.base_config.file_groups.len())
     }
 
     fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
@@ -140,15 +100,10 @@ impl ExecutionPlan for AvroExec {
 
     #[cfg(feature = "avro")]
     async fn execute(&self, partition: usize) -> Result<SendableRecordBatchStream> {
-        let proj = self.projection.as_ref().map(|p| {
-            p.iter()
-                .map(|col_idx| self.file_schema.field(*col_idx).name())
-                .cloned()
-                .collect()
-        });
+        let proj = self.base_config.projected_file_column_names();
 
-        let batch_size = self.batch_size;
-        let file_schema = Arc::clone(&self.file_schema);
+        let batch_size = self.base_config.batch_size;
+        let file_schema = Arc::clone(&self.base_config.file_schema);
 
         // The avro reader cannot limit the number of records, so `remaining` is ignored.
         let fun = move |file, _remaining: &Option<usize>| {
@@ -167,11 +122,12 @@ impl ExecutionPlan for AvroExec {
         };
 
         Ok(Box::pin(FileStream::new(
-            Arc::clone(&self.object_store),
-            self.file_groups[partition].clone(),
+            Arc::clone(&self.base_config.object_store),
+            self.base_config.file_groups[partition].clone(),
             fun,
             Arc::clone(&self.projected_schema),
-            self.limit,
+            self.base_config.limit,
+            self.base_config.table_partition_cols.clone(),
         )))
     }
 
@@ -185,16 +141,16 @@ impl ExecutionPlan for AvroExec {
                 write!(
                     f,
                     "AvroExec: files={}, batch_size={}, limit={:?}",
-                    super::FileGroupsDisplay(&self.file_groups),
-                    self.batch_size,
-                    self.limit,
+                    super::FileGroupsDisplay(&self.base_config.file_groups),
+                    self.base_config.batch_size,
+                    self.base_config.limit,
                 )
             }
         }
     }
 
     fn statistics(&self) -> Statistics {
-        self.statistics.clone()
+        self.projected_statistics.clone()
     }
 }
 
@@ -202,45 +158,59 @@ impl ExecutionPlan for AvroExec {
 #[cfg(feature = "avro")]
 mod tests {
 
+    use crate::datasource::file_format::{avro::AvroFormat, FileFormat};
     use crate::datasource::object_store::local::{
-        local_file_meta, local_object_reader_stream, LocalFileSystem,
+        local_object_reader_stream, local_unpartitioned_file, LocalFileSystem,
     };
+    use crate::scalar::ScalarValue;
+    use futures::StreamExt;
 
     use super::*;
 
     #[tokio::test]
-    async fn test() -> Result<()> {
-        use futures::StreamExt;
-
-        use crate::datasource::file_format::{avro::AvroFormat, FileFormat};
-
+    async fn avro_exec_without_partition() -> Result<()> {
         let testdata = crate::test_util::arrow_test_data();
         let filename = format!("{}/avro/alltypes_plain.avro", testdata);
-        let avro_exec = AvroExec::new(
-            Arc::new(LocalFileSystem {}),
-            vec![vec![PartitionedFile {
-                file_meta: local_file_meta(filename.clone()),
-            }]],
-            Statistics::default(),
-            AvroFormat {}
+        let avro_exec = AvroExec::new(PhysicalPlanConfig {
+            object_store: Arc::new(LocalFileSystem {}),
+            file_groups: vec![vec![local_unpartitioned_file(filename.clone())]],
+            file_schema: AvroFormat {}
                 .infer_schema(local_object_reader_stream(vec![filename]))
                 .await?,
-            Some(vec![0, 1, 2]),
-            1024,
-            None,
-        );
+            statistics: Statistics::default(),
+            projection: Some(vec![0, 1, 2]),
+            batch_size: 1024,
+            limit: None,
+            table_partition_cols: vec![],
+        });
         assert_eq!(avro_exec.output_partitioning().partition_count(), 1);
 
-        let mut results = avro_exec.execute(0).await?;
-        let batch = results.next().await.unwrap()?;
+        let mut results = avro_exec.execute(0).await.expect("plan execution failed");
+        let batch = results
+            .next()
+            .await
+            .expect("plan iterator empty")
+            .expect("plan iterator returned an error");
 
-        assert_eq!(8, batch.num_rows());
-        assert_eq!(3, batch.num_columns());
+        let expected = vec![
+            "+----+----------+-------------+",
+            "| id | bool_col | tinyint_col |",
+            "+----+----------+-------------+",
+            "| 4  | true     | 0           |",
+            "| 5  | false    | 1           |",
+            "| 6  | true     | 0           |",
+            "| 7  | false    | 1           |",
+            "| 2  | true     | 0           |",
+            "| 3  | false    | 1           |",
+            "| 0  | true     | 0           |",
+            "| 1  | false    | 1           |",
+            "+----+----------+-------------+",
+        ];
 
-        let schema = batch.schema();
-        let field_names: Vec<&str> =
-            schema.fields().iter().map(|f| f.name().as_str()).collect();
-        assert_eq!(vec!["id", "bool_col", "tinyint_col"], field_names);
+        crate::assert_batches_eq!(expected, &[batch]);
+
+        let batch = results.next().await;
+        assert!(batch.is_none());
 
         let batch = results.next().await;
         assert!(batch.is_none());
@@ -248,6 +218,57 @@ mod tests {
         let batch = results.next().await;
         assert!(batch.is_none());
 
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn avro_exec_with_partition() -> Result<()> {
+        let testdata = crate::test_util::arrow_test_data();
+        let filename = format!("{}/avro/alltypes_plain.avro", testdata);
+        let mut partitioned_file = local_unpartitioned_file(filename.clone());
+        partitioned_file.partition_values =
+            vec![ScalarValue::Utf8(Some("2021-10-26".to_owned()))];
+        let file_schema = AvroFormat {}
+            .infer_schema(local_object_reader_stream(vec![filename]))
+            .await?;
+
+        let avro_exec = AvroExec::new(PhysicalPlanConfig {
+            // select specific columns of the files as well as the partitioning
+            // column which is supposed to be the last column in the table schema.
+            projection: Some(vec![0, 1, file_schema.fields().len(), 2]),
+            object_store: Arc::new(LocalFileSystem {}),
+            file_groups: vec![vec![partitioned_file]],
+            file_schema: file_schema,
+            statistics: Statistics::default(),
+            batch_size: 1024,
+            limit: None,
+            table_partition_cols: vec!["date".to_owned()],
+        });
+        assert_eq!(avro_exec.output_partitioning().partition_count(), 1);
+
+        let mut results = avro_exec.execute(0).await.expect("plan execution failed");
+        let batch = results
+            .next()
+            .await
+            .expect("plan iterator empty")
+            .expect("plan iterator returned an error");
+
+        let expected = vec![
+            "+----+----------+------------+-------------+",
+            "| id | bool_col | date       | tinyint_col |",
+            "+----+----------+------------+-------------+",
+            "| 4  | true     | 2021-10-26 | 0           |",
+            "| 5  | false    | 2021-10-26 | 1           |",
+            "| 6  | true     | 2021-10-26 | 0           |",
+            "| 7  | false    | 2021-10-26 | 1           |",
+            "| 2  | true     | 2021-10-26 | 0           |",
+            "| 3  | false    | 2021-10-26 | 1           |",
+            "| 0  | true     | 2021-10-26 | 0           |",
+            "| 1  | false    | 2021-10-26 | 1           |",
+            "+----+----------+------------+-------------+",
+        ];
+        crate::assert_batches_eq!(expected, &[batch]);
+
         let batch = results.next().await;
         assert!(batch.is_none());
 
diff --git a/datafusion/src/physical_plan/file_format/csv.rs b/datafusion/src/physical_plan/file_format/csv.rs
index fc82c8fd272e..0057e9e811ab 100644
--- a/datafusion/src/physical_plan/file_format/csv.rs
+++ b/datafusion/src/physical_plan/file_format/csv.rs
@@ -17,81 +17,48 @@
 
 //! Execution plan for reading CSV files
 
-use crate::datasource::object_store::ObjectStore;
-use crate::datasource::PartitionedFile;
 use crate::error::{DataFusionError, Result};
 use crate::physical_plan::{
     DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream, Statistics,
 };
 
 use arrow::csv;
-use arrow::datatypes::{Schema, SchemaRef};
+use arrow::datatypes::SchemaRef;
 use std::any::Any;
 use std::sync::Arc;
 
 use async_trait::async_trait;
 
 use super::file_stream::{BatchIter, FileStream};
+use super::PhysicalPlanConfig;
 
 /// Execution plan for scanning a CSV file
 #[derive(Debug, Clone)]
 pub struct CsvExec {
-    object_store: Arc<dyn ObjectStore>,
-    file_groups: Vec<Vec<PartitionedFile>>,
-    /// Schema representing the CSV file
-    file_schema: SchemaRef,
-    /// Schema after the projection has been applied
+    base_config: PhysicalPlanConfig,
+    projected_statistics: Statistics,
     projected_schema: SchemaRef,
-    statistics: Statistics,
     has_header: bool,
     delimiter: u8,
-    projection: Option<Vec<usize>>,
-    batch_size: usize,
-    limit: Option<usize>,
 }
 
 impl CsvExec {
-    /// Create a new CSV reader execution plan provided file list and schema
-    #[allow(clippy::too_many_arguments)]
-    pub fn new(
-        object_store: Arc<dyn ObjectStore>,
-        file_groups: Vec<Vec<PartitionedFile>>,
-        statistics: Statistics,
-        file_schema: SchemaRef,
-        has_header: bool,
-        delimiter: u8,
-        projection: Option<Vec<usize>>,
-        batch_size: usize,
-        limit: Option<usize>,
-    ) -> Self {
-        let projected_schema = match &projection {
-            None => Arc::clone(&file_schema),
-            Some(p) => Arc::new(Schema::new(
-                p.iter().map(|i| file_schema.field(*i).clone()).collect(),
-            )),
-        };
+    /// Create a new CSV reader execution plan provided base and specific configurations
+    pub fn new(base_config: PhysicalPlanConfig, has_header: bool, delimiter: u8) -> Self {
+        let (projected_schema, projected_statistics) = base_config.project();
 
         Self {
-            object_store,
-            file_groups,
-            file_schema,
-            statistics,
+            base_config,
+            projected_schema,
+            projected_statistics,
             has_header,
             delimiter,
-            projection,
-            projected_schema,
-            batch_size,
-            limit,
         }
     }
 
-    /// List of data files
-    pub fn file_groups(&self) -> &[Vec<PartitionedFile>] {
-        &self.file_groups
-    }
-    /// The schema before projection
-    pub fn file_schema(&self) -> &SchemaRef {
-        &self.file_schema
+    /// Ref to the base configs
+    pub fn base_config(&self) -> &PhysicalPlanConfig {
+        &self.base_config
     }
     /// true if the first line of each file is a header
     pub fn has_header(&self) -> bool {
@@ -101,18 +68,6 @@ impl CsvExec {
     pub fn delimiter(&self) -> u8 {
         self.delimiter
     }
-    /// Optional projection for which columns to load
-    pub fn projection(&self) -> &Option<Vec<usize>> {
-        &self.projection
-    }
-    /// Batch size
-    pub fn batch_size(&self) -> usize {
-        self.batch_size
-    }
-    /// Limit in nr. of rows
-    pub fn limit(&self) -> Option<usize> {
-        self.limit
-    }
 }
 
 #[async_trait]
@@ -129,7 +84,7 @@ impl ExecutionPlan for CsvExec {
 
     /// Get the output partitioning of this plan
     fn output_partitioning(&self) -> Partitioning {
-        Partitioning::UnknownPartitioning(self.file_groups.len())
+        Partitioning::UnknownPartitioning(self.base_config.file_groups.len())
     }
 
     fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
@@ -152,9 +107,9 @@ impl ExecutionPlan for CsvExec {
     }
 
     async fn execute(&self, partition: usize) -> Result<SendableRecordBatchStream> {
-        let batch_size = self.batch_size;
-        let file_schema = Arc::clone(&self.file_schema);
-        let projection = self.projection.clone();
+        let batch_size = self.base_config.batch_size;
+        let file_schema = Arc::clone(&self.base_config.file_schema);
+        let file_projection = self.base_config.file_column_projection_indices();
         let has_header = self.has_header;
         let delimiter = self.delimiter;
         let start_line = if has_header { 1 } else { 0 };
@@ -168,16 +123,17 @@ impl ExecutionPlan for CsvExec {
                 Some(delimiter),
                 batch_size,
                 bounds,
-                projection.clone(),
+                file_projection.clone(),
             )) as BatchIter
         };
 
         Ok(Box::pin(FileStream::new(
-            Arc::clone(&self.object_store),
-            self.file_groups[partition].clone(),
+            Arc::clone(&self.base_config.object_store),
+            self.base_config.file_groups[partition].clone(),
             fun,
             Arc::clone(&self.projected_schema),
-            self.limit,
+            self.base_config.limit,
+            self.base_config.table_partition_cols.clone(),
         )))
     }
 
@@ -191,17 +147,17 @@ impl ExecutionPlan for CsvExec {
                 write!(
                     f,
                     "CsvExec: files={}, has_header={}, batch_size={}, limit={:?}",
-                    super::FileGroupsDisplay(&self.file_groups),
+                    super::FileGroupsDisplay(&self.base_config.file_groups),
                     self.has_header,
-                    self.batch_size,
-                    self.limit,
+                    self.base_config.batch_size,
+                    self.base_config.limit,
                 )
             }
         }
     }
 
     fn statistics(&self) -> Statistics {
-        self.statistics.clone()
+        self.projected_statistics.clone()
     }
 }
 
@@ -209,74 +165,153 @@ impl ExecutionPlan for CsvExec {
 mod tests {
     use super::*;
     use crate::{
-        datasource::object_store::local::{local_file_meta, LocalFileSystem},
+        datasource::object_store::local::{local_unpartitioned_file, LocalFileSystem},
+        scalar::ScalarValue,
         test::aggr_test_schema,
     };
     use futures::StreamExt;
 
     #[tokio::test]
     async fn csv_exec_with_projection() -> Result<()> {
-        let schema = aggr_test_schema();
+        let file_schema = aggr_test_schema();
         let testdata = crate::test_util::arrow_test_data();
         let filename = "aggregate_test_100.csv";
         let path = format!("{}/csv/{}", testdata, filename);
         let csv = CsvExec::new(
-            Arc::new(LocalFileSystem {}),
-            vec![vec![PartitionedFile {
-                file_meta: local_file_meta(path),
-            }]],
-            Statistics::default(),
-            schema,
+            PhysicalPlanConfig {
+                object_store: Arc::new(LocalFileSystem {}),
+                file_schema,
+                file_groups: vec![vec![local_unpartitioned_file(path)]],
+                statistics: Statistics::default(),
+                projection: Some(vec![0, 2, 4]),
+                batch_size: 1024,
+                limit: None,
+                table_partition_cols: vec![],
+            },
             true,
             b',',
-            Some(vec![0, 2, 4]),
-            1024,
-            None,
         );
-        assert_eq!(13, csv.file_schema.fields().len());
+        assert_eq!(13, csv.base_config.file_schema.fields().len());
         assert_eq!(3, csv.projected_schema.fields().len());
         assert_eq!(3, csv.schema().fields().len());
+
         let mut stream = csv.execute(0).await?;
         let batch = stream.next().await.unwrap()?;
         assert_eq!(3, batch.num_columns());
-        let batch_schema = batch.schema();
-        assert_eq!(3, batch_schema.fields().len());
-        assert_eq!("c1", batch_schema.field(0).name());
-        assert_eq!("c3", batch_schema.field(1).name());
-        assert_eq!("c5", batch_schema.field(2).name());
+        assert_eq!(100, batch.num_rows());
+
+        // slice of the first 5 lines
+        let expected = vec![
+            "+----+-----+------------+",
+            "| c1 | c3  | c5         |",
+            "+----+-----+------------+",
+            "| c  | 1   | 2033001162 |",
+            "| d  | -40 | 706441268  |",
+            "| b  | 29  | 994303988  |",
+            "| a  | -85 | 1171968280 |",
+            "| b  | -82 | 1824882165 |",
+            "+----+-----+------------+",
+        ];
+
+        crate::assert_batches_eq!(expected, &[batch.slice(0, 5)]);
         Ok(())
     }
 
     #[tokio::test]
-    async fn csv_exec_without_projection() -> Result<()> {
-        let schema = aggr_test_schema();
+    async fn csv_exec_with_limit() -> Result<()> {
+        let file_schema = aggr_test_schema();
         let testdata = crate::test_util::arrow_test_data();
         let filename = "aggregate_test_100.csv";
         let path = format!("{}/csv/{}", testdata, filename);
         let csv = CsvExec::new(
-            Arc::new(LocalFileSystem {}),
-            vec![vec![PartitionedFile {
-                file_meta: local_file_meta(path),
-            }]],
-            Statistics::default(),
-            schema,
+            PhysicalPlanConfig {
+                object_store: Arc::new(LocalFileSystem {}),
+                file_schema,
+                file_groups: vec![vec![local_unpartitioned_file(path)]],
+                statistics: Statistics::default(),
+                projection: None,
+                batch_size: 1024,
+                limit: Some(5),
+                table_partition_cols: vec![],
+            },
             true,
             b',',
-            None,
-            1024,
-            None,
         );
-        assert_eq!(13, csv.file_schema.fields().len());
+        assert_eq!(13, csv.base_config.file_schema.fields().len());
         assert_eq!(13, csv.projected_schema.fields().len());
         assert_eq!(13, csv.schema().fields().len());
+
         let mut it = csv.execute(0).await?;
         let batch = it.next().await.unwrap()?;
         assert_eq!(13, batch.num_columns());
-        let batch_schema = batch.schema();
-        assert_eq!(13, batch_schema.fields().len());
-        assert_eq!("c1", batch_schema.field(0).name());
-        assert_eq!("c2", batch_schema.field(1).name());
-        assert_eq!("c3", batch_schema.field(2).name());
+        assert_eq!(5, batch.num_rows());
+
+        let expected = vec![
+            "+----+----+-----+--------+------------+----------------------+-----+-------+------------+----------------------+-------------+---------------------+--------------------------------+",
+            "| c1 | c2 | c3  | c4     | c5         | c6                   | c7  | c8    | c9         | c10                  | c11         | c12                 | c13                            |",
+            "+----+----+-----+--------+------------+----------------------+-----+-------+------------+----------------------+-------------+---------------------+--------------------------------+",
+            "| c  | 2  | 1   | 18109  | 2033001162 | -6513304855495910254 | 25  | 43062 | 1491205016 | 5863949479783605708  | 0.110830784 | 0.9294097332465232  | 6WfVFBVGJSQb7FhA7E0lBwdvjfZnSW |",
+            "| d  | 5  | -40 | 22614  | 706441268  | -7542719935673075327 | 155 | 14337 | 3373581039 | 11720144131976083864 | 0.69632107  | 0.3114712539863804  | C2GT5KVyOPZpgKVl110TyZO0NcJ434 |",
+            "| b  | 1  | 29  | -18218 | 994303988  | 5983957848665088916  | 204 | 9489  | 3275293996 | 14857091259186476033 | 0.53840446  | 0.17909035118828576 | AyYVExXK6AR2qUTxNZ7qRHQOVGMLcz |",
+            "| a  | 1  | -85 | -15154 | 1171968280 | 1919439543497968449  | 77  | 52286 | 774637006  | 12101411955859039553 | 0.12285209  | 0.6864391962767343  | 0keZ5G8BffGwgF2RwQD59TFzMStxCB |",
+            "| b  | 5  | -82 | 22080  | 1824882165 | 7373730676428214987  | 208 | 34331 | 3342719438 | 3330177516592499461  | 0.82634634  | 0.40975383525297016 | Ig1QcuKsjHXkproePdERo2w0mYzIqd |",
+            "+----+----+-----+--------+------------+----------------------+-----+-------+------------+----------------------+-------------+---------------------+--------------------------------+",
+        ];
+
+        crate::assert_batches_eq!(expected, &[batch]);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn csv_exec_with_partition() -> Result<()> {
+        let file_schema = aggr_test_schema();
+        let testdata = crate::test_util::arrow_test_data();
+        let filename = "aggregate_test_100.csv";
+        // we don't have `/date=xx/` in the path but that is ok because
+        // partitions are resolved during scan anyway
+        let path = format!("{}/csv/{}", testdata, filename);
+        let mut partitioned_file = local_unpartitioned_file(path);
+        partitioned_file.partition_values =
+            vec![ScalarValue::Utf8(Some("2021-10-26".to_owned()))];
+        let csv = CsvExec::new(
+            PhysicalPlanConfig {
+                // we should be able to project on the partition column
+                // wich is supposed to be after the file fields
+                projection: Some(vec![0, file_schema.fields().len()]),
+                object_store: Arc::new(LocalFileSystem {}),
+                file_schema,
+                file_groups: vec![vec![partitioned_file]],
+                statistics: Statistics::default(),
+                batch_size: 1024,
+                limit: None,
+                table_partition_cols: vec!["date".to_owned()],
+            },
+            true,
+            b',',
+        );
+        assert_eq!(13, csv.base_config.file_schema.fields().len());
+        assert_eq!(2, csv.projected_schema.fields().len());
+        assert_eq!(2, csv.schema().fields().len());
+
+        let mut it = csv.execute(0).await?;
+        let batch = it.next().await.unwrap()?;
+        assert_eq!(2, batch.num_columns());
+        assert_eq!(100, batch.num_rows());
+
+        // slice of the first 5 lines
+        let expected = vec![
+            "+----+------------+",
+            "| c1 | date       |",
+            "+----+------------+",
+            "| c  | 2021-10-26 |",
+            "| d  | 2021-10-26 |",
+            "| b  | 2021-10-26 |",
+            "| a  | 2021-10-26 |",
+            "| b  | 2021-10-26 |",
+            "+----+------------+",
+        ];
+        crate::assert_batches_eq!(expected, &[batch.slice(0, 5)]);
         Ok(())
     }
 }
diff --git a/datafusion/src/physical_plan/file_format/file_stream.rs b/datafusion/src/physical_plan/file_format/file_stream.rs
index 55a66f46cf48..958b1721bb39 100644
--- a/datafusion/src/physical_plan/file_format/file_stream.rs
+++ b/datafusion/src/physical_plan/file_format/file_stream.rs
@@ -23,8 +23,8 @@
 
 use crate::{
     datasource::{object_store::ObjectStore, PartitionedFile},
-    error::Result as DataFusionResult,
     physical_plan::RecordBatchStream,
+    scalar::ScalarValue,
 };
 use arrow::{
     datatypes::SchemaRef,
@@ -40,8 +40,9 @@ use std::{
     task::{Context, Poll},
 };
 
-pub type FileIter =
-    Box<dyn Iterator<Item = DataFusionResult<Box<dyn Read + Send + Sync>>> + Send + Sync>;
+use super::PartitionColumnProjector;
+
+pub type FileIter = Box<dyn Iterator<Item = PartitionedFile> + Send + Sync>;
 pub type BatchIter = Box<dyn Iterator<Item = ArrowResult<RecordBatch>> + Send + Sync>;
 
 /// A closure that creates a file format reader (iterator over `RecordBatch`) from a `Read` object
@@ -63,10 +64,13 @@ impl<T> FormatReaderOpener for T where
 pub struct FileStream<F: FormatReaderOpener> {
     /// An iterator over record batches of the last file returned by file_iter
     batch_iter: BatchIter,
-    /// An iterator over input files
+    /// Partitioning column values for the current batch_iter
+    partition_values: Vec<ScalarValue>,
+    /// An iterator over input files.
     file_iter: FileIter,
-    /// The stream schema (file schema after projection)
-    schema: SchemaRef,
+    /// The stream schema (file schema including partition columns and after
+    /// projection).
+    projected_schema: SchemaRef,
     /// The remaining number of records to parse, None if no limit
     remain: Option<usize>,
     /// A closure that takes a reader and an optional remaining number of lines
@@ -74,6 +78,10 @@ pub struct FileStream<F: FormatReaderOpener> {
     /// is not capable of limiting the number of records in the last batch, the file
     /// stream will take care of truncating it.
     file_reader: F,
+    /// The partition column projector
+    pc_projector: PartitionColumnProjector,
+    /// the store from which to source the files.
+    object_store: Arc<dyn ObjectStore>,
 }
 
 impl<F: FormatReaderOpener> FileStream<F> {
@@ -81,34 +89,48 @@ impl<F: FormatReaderOpener> FileStream<F> {
         object_store: Arc<dyn ObjectStore>,
         files: Vec<PartitionedFile>,
         file_reader: F,
-        schema: SchemaRef,
+        projected_schema: SchemaRef,
         limit: Option<usize>,
+        table_partition_cols: Vec<String>,
     ) -> Self {
-        let read_iter = files.into_iter().map(move |f| -> DataFusionResult<_> {
-            object_store
-                .file_reader(f.file_meta.sized_file)?
-                .sync_reader()
-        });
+        let pc_projector = PartitionColumnProjector::new(
+            Arc::clone(&projected_schema),
+            &table_partition_cols,
+        );
 
         Self {
-            file_iter: Box::new(read_iter),
+            file_iter: Box::new(files.into_iter()),
             batch_iter: Box::new(iter::empty()),
+            partition_values: vec![],
             remain: limit,
-            schema,
+            projected_schema,
             file_reader,
+            pc_projector,
+            object_store,
         }
     }
 
-    /// Acts as a flat_map of record batches over files.
+    /// Acts as a flat_map of record batches over files. Adds the partitioning
+    /// Columns to the returned record batches.
     fn next_batch(&mut self) -> Option<ArrowResult<RecordBatch>> {
         match self.batch_iter.next() {
-            Some(batch) => Some(batch),
+            Some(Ok(batch)) => {
+                Some(self.pc_projector.project(batch, &self.partition_values))
+            }
+            Some(Err(e)) => Some(Err(e)),
             None => match self.file_iter.next() {
-                Some(Ok(f)) => {
-                    self.batch_iter = (self.file_reader)(f, &self.remain);
-                    self.next_batch()
+                Some(f) => {
+                    self.partition_values = f.partition_values;
+                    self.object_store
+                        .file_reader(f.file_meta.sized_file)
+                        .and_then(|r| r.sync_reader())
+                        .map_err(|e| ArrowError::ExternalError(Box::new(e)))
+                        .and_then(|f| {
+                            self.batch_iter = (self.file_reader)(f, &self.remain);
+                            self.next_batch().transpose()
+                        })
+                        .transpose()
                 }
-                Some(Err(e)) => Some(Err(ArrowError::ExternalError(Box::new(e)))),
                 None => None,
             },
         }
@@ -157,7 +179,7 @@ impl<F: FormatReaderOpener> Stream for FileStream<F> {
 
 impl<F: FormatReaderOpener> RecordBatchStream for FileStream<F> {
     fn schema(&self) -> SchemaRef {
-        Arc::clone(&self.schema)
+        Arc::clone(&self.projected_schema)
     }
 }
 
@@ -191,6 +213,7 @@ mod tests {
             reader,
             source_schema,
             limit,
+            vec![],
         );
 
         file_stream
diff --git a/datafusion/src/physical_plan/file_format/json.rs b/datafusion/src/physical_plan/file_format/json.rs
index f9dde67fea2d..9032eb9d5e5d 100644
--- a/datafusion/src/physical_plan/file_format/json.rs
+++ b/datafusion/src/physical_plan/file_format/json.rs
@@ -18,61 +18,34 @@
 //! Execution plan for reading line-delimited JSON files
 use async_trait::async_trait;
 
-use crate::datasource::object_store::ObjectStore;
-use crate::datasource::PartitionedFile;
 use crate::error::{DataFusionError, Result};
 use crate::physical_plan::{
     DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream, Statistics,
 };
-use arrow::{
-    datatypes::{Schema, SchemaRef},
-    json,
-};
+use arrow::{datatypes::SchemaRef, json};
 use std::any::Any;
 use std::sync::Arc;
 
 use super::file_stream::{BatchIter, FileStream};
+use super::PhysicalPlanConfig;
 
 /// Execution plan for scanning NdJson data source
 #[derive(Debug, Clone)]
 pub struct NdJsonExec {
-    object_store: Arc<dyn ObjectStore>,
-    file_groups: Vec<Vec<PartitionedFile>>,
-    statistics: Statistics,
-    file_schema: SchemaRef,
-    projection: Option<Vec<usize>>,
+    base_config: PhysicalPlanConfig,
+    projected_statistics: Statistics,
     projected_schema: SchemaRef,
-    batch_size: usize,
-    limit: Option<usize>,
 }
 
 impl NdJsonExec {
-    /// Create a new JSON reader execution plan provided file list and schema
-    pub fn new(
-        object_store: Arc<dyn ObjectStore>,
-        file_groups: Vec<Vec<PartitionedFile>>,
-        statistics: Statistics,
-        file_schema: SchemaRef,
-        projection: Option<Vec<usize>>,
-        batch_size: usize,
-        limit: Option<usize>,
-    ) -> Self {
-        let projected_schema = match &projection {
-            None => Arc::clone(&file_schema),
-            Some(p) => Arc::new(Schema::new(
-                p.iter().map(|i| file_schema.field(*i).clone()).collect(),
-            )),
-        };
+    /// Create a new JSON reader execution plan provided base configurations
+    pub fn new(base_config: PhysicalPlanConfig) -> Self {
+        let (projected_schema, projected_statistics) = base_config.project();
 
         Self {
-            object_store,
-            file_groups,
-            statistics,
-            file_schema,
-            projection,
+            base_config,
             projected_schema,
-            batch_size,
-            limit,
+            projected_statistics,
         }
     }
 }
@@ -88,7 +61,7 @@ impl ExecutionPlan for NdJsonExec {
     }
 
     fn output_partitioning(&self) -> Partitioning {
-        Partitioning::UnknownPartitioning(self.file_groups.len())
+        Partitioning::UnknownPartitioning(self.base_config.file_groups.len())
     }
 
     fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
@@ -110,15 +83,10 @@ impl ExecutionPlan for NdJsonExec {
     }
 
     async fn execute(&self, partition: usize) -> Result<SendableRecordBatchStream> {
-        let proj = self.projection.as_ref().map(|p| {
-            p.iter()
-                .map(|col_idx| self.file_schema.field(*col_idx).name())
-                .cloned()
-                .collect()
-        });
+        let proj = self.base_config.projected_file_column_names();
 
-        let batch_size = self.batch_size;
-        let file_schema = Arc::clone(&self.file_schema);
+        let batch_size = self.base_config.batch_size;
+        let file_schema = Arc::clone(&self.base_config.file_schema);
 
         // The json reader cannot limit the number of records, so `remaining` is ignored.
         let fun = move |file, _remaining: &Option<usize>| {
@@ -131,11 +99,12 @@ impl ExecutionPlan for NdJsonExec {
         };
 
         Ok(Box::pin(FileStream::new(
-            Arc::clone(&self.object_store),
-            self.file_groups[partition].clone(),
+            Arc::clone(&self.base_config.object_store),
+            self.base_config.file_groups[partition].clone(),
             fun,
             Arc::clone(&self.projected_schema),
-            self.limit,
+            self.base_config.limit,
+            self.base_config.table_partition_cols.clone(),
         )))
     }
 
@@ -149,16 +118,16 @@ impl ExecutionPlan for NdJsonExec {
                 write!(
                     f,
                     "JsonExec: batch_size={}, limit={:?}, files={}",
-                    self.batch_size,
-                    self.limit,
-                    super::FileGroupsDisplay(&self.file_groups),
+                    self.base_config.batch_size,
+                    self.base_config.limit,
+                    super::FileGroupsDisplay(&self.base_config.file_groups),
                 )
             }
         }
     }
 
     fn statistics(&self) -> Statistics {
-        self.statistics.clone()
+        self.projected_statistics.clone()
     }
 }
 
@@ -169,7 +138,7 @@ mod tests {
     use crate::datasource::{
         file_format::{json::JsonFormat, FileFormat},
         object_store::local::{
-            local_file_meta, local_object_reader_stream, LocalFileSystem,
+            local_object_reader_stream, local_unpartitioned_file, LocalFileSystem,
         },
     };
 
@@ -187,17 +156,16 @@ mod tests {
     async fn nd_json_exec_file_without_projection() -> Result<()> {
         use arrow::datatypes::DataType;
         let path = format!("{}/1.json", TEST_DATA_BASE);
-        let exec = NdJsonExec::new(
-            Arc::new(LocalFileSystem {}),
-            vec![vec![PartitionedFile {
-                file_meta: local_file_meta(path.clone()),
-            }]],
-            Default::default(),
-            infer_schema(path).await?,
-            None,
-            1024,
-            Some(3),
-        );
+        let exec = NdJsonExec::new(PhysicalPlanConfig {
+            object_store: Arc::new(LocalFileSystem {}),
+            file_groups: vec![vec![local_unpartitioned_file(path.clone())]],
+            file_schema: infer_schema(path).await?,
+            statistics: Statistics::default(),
+            projection: None,
+            batch_size: 1024,
+            limit: Some(3),
+            table_partition_cols: vec![],
+        });
 
         // TODO: this is not where schema inference should be tested
 
@@ -242,17 +210,16 @@ mod tests {
     #[tokio::test]
     async fn nd_json_exec_file_projection() -> Result<()> {
         let path = format!("{}/1.json", TEST_DATA_BASE);
-        let exec = NdJsonExec::new(
-            Arc::new(LocalFileSystem {}),
-            vec![vec![PartitionedFile {
-                file_meta: local_file_meta(path.clone()),
-            }]],
-            Default::default(),
-            infer_schema(path).await?,
-            Some(vec![0, 2]),
-            1024,
-            None,
-        );
+        let exec = NdJsonExec::new(PhysicalPlanConfig {
+            object_store: Arc::new(LocalFileSystem {}),
+            file_groups: vec![vec![local_unpartitioned_file(path.clone())]],
+            file_schema: infer_schema(path).await?,
+            statistics: Statistics::default(),
+            projection: Some(vec![0, 2]),
+            batch_size: 1024,
+            limit: None,
+            table_partition_cols: vec![],
+        });
         let inferred_schema = exec.schema();
         assert_eq!(inferred_schema.fields().len(), 2);
 
diff --git a/datafusion/src/physical_plan/file_format/mod.rs b/datafusion/src/physical_plan/file_format/mod.rs
index b0b690519eca..d460e9830fe5 100644
--- a/datafusion/src/physical_plan/file_format/mod.rs
+++ b/datafusion/src/physical_plan/file_format/mod.rs
@@ -24,19 +24,134 @@ mod json;
 mod parquet;
 
 pub use self::parquet::ParquetExec;
+use arrow::{
+    array::{ArrayData, ArrayRef, DictionaryArray, UInt8BufferBuilder},
+    buffer::Buffer,
+    datatypes::{DataType, Field, Schema, SchemaRef, UInt8Type},
+    error::{ArrowError, Result as ArrowResult},
+    record_batch::RecordBatch,
+};
 pub use avro::AvroExec;
 pub use csv::CsvExec;
 pub use json::NdJsonExec;
 
-use crate::datasource::PartitionedFile;
-use std::fmt::{Display, Formatter, Result};
+use crate::{
+    datasource::{object_store::ObjectStore, PartitionedFile},
+    scalar::ScalarValue,
+};
+use std::{
+    collections::HashMap,
+    fmt::{Display, Formatter, Result as FmtResult},
+    sync::Arc,
+    vec,
+};
+
+use super::{ColumnStatistics, Statistics};
+
+lazy_static! {
+    /// The datatype used for all partitioning columns for now
+    pub static ref DEFAULT_PARTITION_COLUMN_DATATYPE: DataType = DataType::Dictionary(Box::new(DataType::UInt8), Box::new(DataType::Utf8));
+}
+
+/// The base configurations to provide when creating a physical plan for
+/// any given file format.
+#[derive(Debug, Clone)]
+pub struct PhysicalPlanConfig {
+    /// Store from which the `files` should be fetched
+    pub object_store: Arc<dyn ObjectStore>,
+    /// Schema before projection. It contains the columns that are expected
+    /// to be in the files without the table partition columns.
+    pub file_schema: SchemaRef,
+    /// List of files to be processed, grouped into partitions
+    pub file_groups: Vec<Vec<PartitionedFile>>,
+    /// Estimated overall statistics of the files, taking `filters` into account.
+    pub statistics: Statistics,
+    /// Columns on which to project the data. Indexes that are higher than the
+    /// number of columns of `file_schema` refer to `table_partition_cols`.
+    pub projection: Option<Vec<usize>>,
+    /// The maximum number of records per arrow column
+    pub batch_size: usize,
+    /// The minimum number of records required from this source plan
+    pub limit: Option<usize>,
+    /// The partitioning column names
+    pub table_partition_cols: Vec<String>,
+}
+
+impl PhysicalPlanConfig {
+    /// Project the schema and the statistics on the given column indices
+    fn project(&self) -> (SchemaRef, Statistics) {
+        if self.projection.is_none() && self.table_partition_cols.is_empty() {
+            return (Arc::clone(&self.file_schema), self.statistics.clone());
+        }
+
+        let proj_iter: Box<dyn Iterator<Item = usize>> = match &self.projection {
+            Some(proj) => Box::new(proj.iter().copied()),
+            None => Box::new(
+                0..(self.file_schema.fields().len() + self.table_partition_cols.len()),
+            ),
+        };
+
+        let mut table_fields = vec![];
+        let mut table_cols_stats = vec![];
+        for idx in proj_iter {
+            if idx < self.file_schema.fields().len() {
+                table_fields.push(self.file_schema.field(idx).clone());
+                if let Some(file_cols_stats) = &self.statistics.column_statistics {
+                    table_cols_stats.push(file_cols_stats[idx].clone())
+                } else {
+                    table_cols_stats.push(ColumnStatistics::default())
+                }
+            } else {
+                let partition_idx = idx - self.file_schema.fields().len();
+                table_fields.push(Field::new(
+                    &self.table_partition_cols[partition_idx],
+                    DEFAULT_PARTITION_COLUMN_DATATYPE.clone(),
+                    false,
+                ));
+                // TODO provide accurate stat for partition column (#1186)
+                table_cols_stats.push(ColumnStatistics::default())
+            }
+        }
+
+        let table_stats = Statistics {
+            num_rows: self.statistics.num_rows,
+            is_exact: self.statistics.is_exact,
+            // TODO correct byte size?
+            total_byte_size: None,
+            column_statistics: Some(table_cols_stats),
+        };
+
+        let table_schema = Arc::new(Schema::new(table_fields));
+
+        (table_schema, table_stats)
+    }
+
+    fn projected_file_column_names(&self) -> Option<Vec<String>> {
+        self.projection.as_ref().map(|p| {
+            p.iter()
+                .filter(|col_idx| **col_idx < self.file_schema.fields().len())
+                .map(|col_idx| self.file_schema.field(*col_idx).name())
+                .cloned()
+                .collect()
+        })
+    }
+
+    fn file_column_projection_indices(&self) -> Option<Vec<usize>> {
+        self.projection.as_ref().map(|p| {
+            p.iter()
+                .filter(|col_idx| **col_idx < self.file_schema.fields().len())
+                .copied()
+                .collect()
+        })
+    }
+}
 
 /// A wrapper to customize partitioned file display
 #[derive(Debug)]
 struct FileGroupsDisplay<'a>(&'a [Vec<PartitionedFile>]);
 
 impl<'a> Display for FileGroupsDisplay<'a> {
-    fn fmt(&self, f: &mut Formatter) -> Result {
+    fn fmt(&self, f: &mut Formatter) -> FmtResult {
         let parts: Vec<_> = self
             .0
             .iter()
@@ -50,3 +165,324 @@ impl<'a> Display for FileGroupsDisplay<'a> {
         write!(f, "[{}]", parts.join(", "))
     }
 }
+
+/// A helper that projects partition columns into the file record batches.
+///
+/// One interesting trick is the usage of a cache for the key buffers of the partition column
+/// dictionaries. Indeed, the partition columns are constant, so the dictionaries that represent them
+/// have all their keys equal to 0. This enables us to re-use the same "all-zero" buffer across batches,
+/// which makes the space consumption of the partition columns O(batch_size) instead of O(record_count).
+struct PartitionColumnProjector {
+    /// An Arrow buffer initialized to zeros that represents the key array of all partition
+    /// columns (partition columns are materialized by dictionary arrays with only one
+    /// value in the dictionary, thus all the keys are equal to zero).
+    key_buffer_cache: Option<Buffer>,
+    /// Mapping between the indexes in the list of partition columns and the target
+    /// schema. Sorted by index in the target schema so that we can iterate on it to
+    /// insert the partition columns in the target record batch.
+    projected_partition_indexes: Vec<(usize, usize)>,
+    /// The schema of the table once the projection was applied.
+    projected_schema: SchemaRef,
+}
+
+impl PartitionColumnProjector {
+    // Create a projector to insert the partitioning columns into batches read from files
+    // - projected_schema: the target schema with both file and partitioning columns
+    // - table_partition_cols: all the partitioning column names
+    fn new(projected_schema: SchemaRef, table_partition_cols: &[String]) -> Self {
+        let mut idx_map = HashMap::new();
+        for (partition_idx, partition_name) in table_partition_cols.iter().enumerate() {
+            if let Ok(schema_idx) = projected_schema.index_of(partition_name) {
+                idx_map.insert(partition_idx, schema_idx);
+            }
+        }
+
+        let mut projected_partition_indexes: Vec<_> = idx_map.into_iter().collect();
+        projected_partition_indexes.sort_by(|(_, a), (_, b)| a.cmp(b));
+
+        Self {
+            projected_partition_indexes,
+            key_buffer_cache: None,
+            projected_schema,
+        }
+    }
+
+    // Transform the batch read from the file by inserting the partitioning columns
+    // to the right positions as deduced from `projected_schema`
+    // - file_batch: batch read from the file, with internal projection applied
+    // - partition_values: the list of partition values, one for each partition column
+    fn project(
+        &mut self,
+        file_batch: RecordBatch,
+        partition_values: &[ScalarValue],
+    ) -> ArrowResult<RecordBatch> {
+        let expected_cols =
+            self.projected_schema.fields().len() - self.projected_partition_indexes.len();
+
+        if file_batch.columns().len() != expected_cols {
+            return Err(ArrowError::SchemaError(format!(
+                "Unexpected batch schema from file, expected {} cols but got {}",
+                expected_cols,
+                file_batch.columns().len()
+            )));
+        }
+
+        let mut cols = file_batch.columns().to_vec();
+        for &(pidx, sidx) in &self.projected_partition_indexes {
+            cols.insert(
+                sidx,
+                create_dict_array(
+                    &mut self.key_buffer_cache,
+                    &partition_values[pidx],
+                    file_batch.num_rows(),
+                ),
+            )
+        }
+        RecordBatch::try_new(Arc::clone(&self.projected_schema), cols)
+    }
+}
+
+fn create_dict_array(
+    key_buffer_cache: &mut Option<Buffer>,
+    val: &ScalarValue,
+    len: usize,
+) -> ArrayRef {
+    // build value dictionary
+    let dict_vals = val.to_array();
+
+    // build keys array
+    let sliced_key_buffer = match key_buffer_cache {
+        Some(buf) if buf.len() >= len => buf.slice(buf.len() - len),
+        _ => {
+            let mut key_buffer_builder = UInt8BufferBuilder::new(len);
+            key_buffer_builder.advance(len); // keys are all 0
+            key_buffer_cache.insert(key_buffer_builder.finish()).clone()
+        }
+    };
+
+    // create data type
+    let data_type =
+        DataType::Dictionary(Box::new(DataType::UInt8), Box::new(val.get_datatype()));
+
+    debug_assert_eq!(data_type, *DEFAULT_PARTITION_COLUMN_DATATYPE);
+
+    // assemble pieces together
+    let mut builder = ArrayData::builder(data_type)
+        .len(len)
+        .add_buffer(sliced_key_buffer);
+    builder = builder.add_child_data(dict_vals.data().clone());
+    Arc::new(DictionaryArray::<UInt8Type>::from(builder.build().unwrap()))
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::test::{
+        aggr_test_schema, build_table_i32, columns, object_store::TestObjectStore,
+    };
+
+    use super::*;
+
+    #[test]
+    fn physical_plan_config_no_projection() {
+        let file_schema = aggr_test_schema();
+        let conf = config_for_projection(
+            Arc::clone(&file_schema),
+            None,
+            Statistics::default(),
+            vec!["date".to_owned()],
+        );
+
+        let (proj_schema, proj_statistics) = conf.project();
+        assert_eq!(proj_schema.fields().len(), file_schema.fields().len() + 1);
+        assert_eq!(
+            proj_schema.field(file_schema.fields().len()).name(),
+            "date",
+            "partition columns are the last columns"
+        );
+        assert_eq!(
+            proj_statistics
+                .column_statistics
+                .expect("projection creates column statistics")
+                .len(),
+            file_schema.fields().len() + 1
+        );
+        // TODO implement tests for partition column statistics once implemented
+
+        let col_names = conf.projected_file_column_names();
+        assert_eq!(col_names, None);
+
+        let col_indices = conf.file_column_projection_indices();
+        assert_eq!(col_indices, None);
+    }
+
+    #[test]
+    fn physical_plan_config_with_projection() {
+        let file_schema = aggr_test_schema();
+        let conf = config_for_projection(
+            Arc::clone(&file_schema),
+            Some(vec![file_schema.fields().len(), 0]),
+            Statistics {
+                num_rows: Some(10),
+                // assign the column index to distinct_count to help assert
+                // the source statistic after the projection
+                column_statistics: Some(
+                    (0..file_schema.fields().len())
+                        .map(|i| ColumnStatistics {
+                            distinct_count: Some(i),
+                            ..Default::default()
+                        })
+                        .collect(),
+                ),
+                ..Default::default()
+            },
+            vec!["date".to_owned()],
+        );
+
+        let (proj_schema, proj_statistics) = conf.project();
+        assert_eq!(
+            columns(&proj_schema),
+            vec!["date".to_owned(), "c1".to_owned()]
+        );
+        let proj_stat_cols = proj_statistics
+            .column_statistics
+            .expect("projection creates column statistics");
+        assert_eq!(proj_stat_cols.len(), 2);
+        // TODO implement tests for proj_stat_cols[0] once partition column
+        // statistics are implemented
+        assert_eq!(proj_stat_cols[1].distinct_count, Some(0));
+
+        let col_names = conf.projected_file_column_names();
+        assert_eq!(col_names, Some(vec!["c1".to_owned()]));
+
+        let col_indices = conf.file_column_projection_indices();
+        assert_eq!(col_indices, Some(vec![0]));
+    }
+
+    #[test]
+    fn partition_column_projector() {
+        let file_batch = build_table_i32(
+            ("a", &vec![0, 1, 2]),
+            ("b", &vec![-2, -1, 0]),
+            ("c", &vec![10, 11, 12]),
+        );
+        let partition_cols =
+            vec!["year".to_owned(), "month".to_owned(), "day".to_owned()];
+        // create a projected schema
+        let conf = config_for_projection(
+            file_batch.schema(),
+            // keep all cols from file and 2 from partitioning
+            Some(vec![
+                0,
+                1,
+                2,
+                file_batch.schema().fields().len(),
+                file_batch.schema().fields().len() + 2,
+            ]),
+            Statistics::default(),
+            partition_cols.clone(),
+        );
+        let (proj_schema, _) = conf.project();
+        // created a projector for that projected schema
+        let mut proj = PartitionColumnProjector::new(proj_schema, &partition_cols);
+
+        // project first batch
+        let projected_batch = proj
+            .project(
+                // file_batch is ok here because we kept all the file cols in the projection
+                file_batch,
+                &[
+                    ScalarValue::Utf8(Some("2021".to_owned())),
+                    ScalarValue::Utf8(Some("10".to_owned())),
+                    ScalarValue::Utf8(Some("26".to_owned())),
+                ],
+            )
+            .expect("Projection of partition columns into record batch failed");
+        let expected = vec![
+            "+---+----+----+------+-----+",
+            "| a | b  | c  | year | day |",
+            "+---+----+----+------+-----+",
+            "| 0 | -2 | 10 | 2021 | 26  |",
+            "| 1 | -1 | 11 | 2021 | 26  |",
+            "| 2 | 0  | 12 | 2021 | 26  |",
+            "+---+----+----+------+-----+",
+        ];
+        crate::assert_batches_eq!(expected, &[projected_batch]);
+
+        // project another batch that is larger than the previous one
+        let file_batch = build_table_i32(
+            ("a", &vec![5, 6, 7, 8, 9]),
+            ("b", &vec![-10, -9, -8, -7, -6]),
+            ("c", &vec![12, 13, 14, 15, 16]),
+        );
+        let projected_batch = proj
+            .project(
+                // file_batch is ok here because we kept all the file cols in the projection
+                file_batch,
+                &[
+                    ScalarValue::Utf8(Some("2021".to_owned())),
+                    ScalarValue::Utf8(Some("10".to_owned())),
+                    ScalarValue::Utf8(Some("27".to_owned())),
+                ],
+            )
+            .expect("Projection of partition columns into record batch failed");
+        let expected = vec![
+            "+---+-----+----+------+-----+",
+            "| a | b   | c  | year | day |",
+            "+---+-----+----+------+-----+",
+            "| 5 | -10 | 12 | 2021 | 27  |",
+            "| 6 | -9  | 13 | 2021 | 27  |",
+            "| 7 | -8  | 14 | 2021 | 27  |",
+            "| 8 | -7  | 15 | 2021 | 27  |",
+            "| 9 | -6  | 16 | 2021 | 27  |",
+            "+---+-----+----+------+-----+",
+        ];
+        crate::assert_batches_eq!(expected, &[projected_batch]);
+
+        // project another batch that is smaller than the previous one
+        let file_batch = build_table_i32(
+            ("a", &vec![0, 1, 3]),
+            ("b", &vec![2, 3, 4]),
+            ("c", &vec![4, 5, 6]),
+        );
+        let projected_batch = proj
+            .project(
+                // file_batch is ok here because we kept all the file cols in the projection
+                file_batch,
+                &[
+                    ScalarValue::Utf8(Some("2021".to_owned())),
+                    ScalarValue::Utf8(Some("10".to_owned())),
+                    ScalarValue::Utf8(Some("28".to_owned())),
+                ],
+            )
+            .expect("Projection of partition columns into record batch failed");
+        let expected = vec![
+            "+---+---+---+------+-----+",
+            "| a | b | c | year | day |",
+            "+---+---+---+------+-----+",
+            "| 0 | 2 | 4 | 2021 | 28  |",
+            "| 1 | 3 | 5 | 2021 | 28  |",
+            "| 3 | 4 | 6 | 2021 | 28  |",
+            "+---+---+---+------+-----+",
+        ];
+        crate::assert_batches_eq!(expected, &[projected_batch]);
+    }
+
+    // sets default for configs that play no role in projections
+    fn config_for_projection(
+        file_schema: SchemaRef,
+        projection: Option<Vec<usize>>,
+        statistics: Statistics,
+        table_partition_cols: Vec<String>,
+    ) -> PhysicalPlanConfig {
+        PhysicalPlanConfig {
+            batch_size: 1024,
+            file_schema,
+            file_groups: vec![vec![]],
+            limit: None,
+            object_store: TestObjectStore::new_arc(&[]),
+            projection,
+            statistics,
+            table_partition_cols,
+        }
+    }
+}
diff --git a/datafusion/src/physical_plan/file_format/parquet.rs b/datafusion/src/physical_plan/file_format/parquet.rs
index d07d2a945e8e..e7980d9aa6d3 100644
--- a/datafusion/src/physical_plan/file_format/parquet.rs
+++ b/datafusion/src/physical_plan/file_format/parquet.rs
@@ -29,6 +29,7 @@ use crate::{
     logical_plan::{Column, Expr},
     physical_optimizer::pruning::{PruningPredicate, PruningStatistics},
     physical_plan::{
+        file_format::PhysicalPlanConfig,
         metrics::{self, ExecutionPlanMetricsSet, MetricBuilder, MetricsSet},
         stream::RecordBatchReceiverStream,
         DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream,
@@ -60,27 +61,18 @@ use tokio::{
 
 use async_trait::async_trait;
 
+use super::PartitionColumnProjector;
+
 /// Execution plan for scanning one or more Parquet partitions
 #[derive(Debug, Clone)]
 pub struct ParquetExec {
-    object_store: Arc<dyn ObjectStore>,
-    /// Grouped list of files. Each group will be processed together by one
-    /// partition of the `ExecutionPlan`.
-    file_groups: Vec<Vec<PartitionedFile>>,
-    /// Schema after projection is applied
-    schema: SchemaRef,
-    /// Projection for which columns to load
-    projection: Vec<usize>,
-    /// Batch size
-    batch_size: usize,
-    /// Statistics for the data set (sum of statistics for all partitions)
-    statistics: Statistics,
+    base_config: PhysicalPlanConfig,
+    projected_statistics: Statistics,
+    projected_schema: SchemaRef,
     /// Execution metrics
     metrics: ExecutionPlanMetricsSet,
     /// Optional predicate builder
     predicate_builder: Option<PruningPredicate>,
-    /// Optional limit of the number of rows
-    limit: Option<usize>,
 }
 
 /// Stores metrics about the parquet execution for a particular parquet file
@@ -95,26 +87,19 @@ struct ParquetFileMetrics {
 impl ParquetExec {
     /// Create a new Parquet reader execution plan provided file list and schema.
     /// Even if `limit` is set, ParquetExec rounds up the number of records to the next `batch_size`.
-    #[allow(clippy::too_many_arguments)]
-    pub fn new(
-        object_store: Arc<dyn ObjectStore>,
-        file_groups: Vec<Vec<PartitionedFile>>,
-        statistics: Statistics,
-        schema: SchemaRef,
-        projection: Option<Vec<usize>>,
-        predicate: Option<Expr>,
-        batch_size: usize,
-        limit: Option<usize>,
-    ) -> Self {
+    pub fn new(base_config: PhysicalPlanConfig, predicate: Option<Expr>) -> Self {
         debug!("Creating ParquetExec, files: {:?}, projection {:?}, predicate: {:?}, limit: {:?}",
-        file_groups, projection, predicate, limit);
+        base_config.file_groups, base_config.projection, predicate, base_config.limit);
 
         let metrics = ExecutionPlanMetricsSet::new();
         let predicate_creation_errors =
             MetricBuilder::new(&metrics).global_counter("num_predicate_creation_errors");
 
         let predicate_builder = predicate.and_then(|predicate_expr| {
-            match PruningPredicate::try_new(&predicate_expr, schema.clone()) {
+            match PruningPredicate::try_new(
+                &predicate_expr,
+                base_config.file_schema.clone(),
+            ) {
                 Ok(predicate_builder) => Some(predicate_builder),
                 Err(e) => {
                     debug!(
@@ -127,73 +112,20 @@ impl ParquetExec {
             }
         });
 
-        let projection = match projection {
-            Some(p) => p,
-            None => (0..schema.fields().len()).collect(),
-        };
-
-        let (projected_schema, projected_statistics) =
-            Self::project(&projection, schema, statistics);
+        let (projected_schema, projected_statistics) = base_config.project();
 
         Self {
-            object_store,
-            file_groups,
-            schema: projected_schema,
-            projection,
+            base_config,
+            projected_schema,
+            projected_statistics,
             metrics,
             predicate_builder,
-            batch_size,
-            statistics: projected_statistics,
-            limit,
         }
     }
 
-    fn project(
-        projection: &[usize],
-        schema: SchemaRef,
-        statistics: Statistics,
-    ) -> (SchemaRef, Statistics) {
-        let projected_schema = Schema::new(
-            projection
-                .iter()
-                .map(|i| schema.field(*i).clone())
-                .collect(),
-        );
-
-        let new_column_statistics = statistics.column_statistics.map(|stats| {
-            let mut projected_stats = Vec::with_capacity(projection.len());
-            for proj in projection {
-                projected_stats.push(stats[*proj].clone());
-            }
-            projected_stats
-        });
-
-        let statistics = Statistics {
-            num_rows: statistics.num_rows,
-            total_byte_size: statistics.total_byte_size,
-            column_statistics: new_column_statistics,
-            is_exact: statistics.is_exact,
-        };
-
-        (Arc::new(projected_schema), statistics)
-    }
-
-    /// List of data files
-    pub fn file_groups(&self) -> &[Vec<PartitionedFile>] {
-        &self.file_groups
-    }
-    /// Optional projection for which columns to load
-    pub fn projection(&self) -> &[usize] {
-        &self.projection
-    }
-    /// Batch size
-    pub fn batch_size(&self) -> usize {
-        self.batch_size
-    }
-
-    /// Limit in nr. of rows
-    pub fn limit(&self) -> Option<usize> {
-        self.limit
+    /// Ref to the base configs
+    pub fn base_config(&self) -> &PhysicalPlanConfig {
+        &self.base_config
     }
 }
 
@@ -227,7 +159,7 @@ impl ExecutionPlan for ParquetExec {
     }
 
     fn schema(&self) -> SchemaRef {
-        self.schema.clone()
+        Arc::clone(&self.projected_schema)
     }
 
     fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
@@ -237,7 +169,7 @@ impl ExecutionPlan for ParquetExec {
 
     /// Get the output partitioning of this plan
     fn output_partitioning(&self) -> Partitioning {
-        Partitioning::UnknownPartitioning(self.file_groups.len())
+        Partitioning::UnknownPartitioning(self.base_config.file_groups.len())
     }
 
     fn with_new_children(
@@ -262,13 +194,20 @@ impl ExecutionPlan for ParquetExec {
             Receiver<ArrowResult<RecordBatch>>,
         ) = channel(2);
 
-        let partition = self.file_groups[partition_index].clone();
+        let partition = self.base_config.file_groups[partition_index].clone();
         let metrics = self.metrics.clone();
-        let projection = self.projection.clone();
+        let projection = match self.base_config.file_column_projection_indices() {
+            Some(proj) => proj,
+            None => (0..self.base_config.file_schema.fields().len()).collect(),
+        };
         let predicate_builder = self.predicate_builder.clone();
-        let batch_size = self.batch_size;
-        let limit = self.limit;
-        let object_store = Arc::clone(&self.object_store);
+        let batch_size = self.base_config.batch_size;
+        let limit = self.base_config.limit;
+        let object_store = Arc::clone(&self.base_config.object_store);
+        let partition_col_proj = PartitionColumnProjector::new(
+            Arc::clone(&self.projected_schema),
+            &self.base_config.table_partition_cols,
+        );
 
         let join_handle = task::spawn_blocking(move || {
             if let Err(e) = read_partition(
@@ -281,13 +220,14 @@ impl ExecutionPlan for ParquetExec {
                 batch_size,
                 response_tx,
                 limit,
+                partition_col_proj,
             ) {
                 println!("Parquet reader thread terminated due to error: {:?}", e);
             }
         });
 
         Ok(RecordBatchReceiverStream::create(
-            &self.schema,
+            &self.projected_schema,
             response_rx,
             join_handle,
         ))
@@ -303,9 +243,9 @@ impl ExecutionPlan for ParquetExec {
                 write!(
                     f,
                     "ParquetExec: batch_size={}, limit={:?}, partitions={}",
-                    self.batch_size,
-                    self.limit,
-                    super::FileGroupsDisplay(&self.file_groups)
+                    self.base_config.batch_size,
+                    self.base_config.limit,
+                    super::FileGroupsDisplay(&self.base_config.file_groups)
                 )
             }
         }
@@ -316,7 +256,7 @@ impl ExecutionPlan for ParquetExec {
     }
 
     fn statistics(&self) -> Statistics {
-        self.statistics.clone()
+        self.projected_statistics.clone()
     }
 }
 
@@ -456,6 +396,7 @@ fn read_partition(
     batch_size: usize,
     response_tx: Sender<ArrowResult<RecordBatch>>,
     limit: Option<usize>,
+    mut partition_column_projector: PartitionColumnProjector,
 ) -> Result<()> {
     let mut total_rows = 0;
     'outer: for partitioned_file in partition {
@@ -483,7 +424,10 @@ fn read_partition(
             match batch_reader.next() {
                 Some(Ok(batch)) => {
                     total_rows += batch.num_rows();
-                    send_result(&response_tx, Ok(batch))?;
+                    let proj_batch = partition_column_projector
+                        .project(batch, &partitioned_file.partition_values);
+
+                    send_result(&response_tx, proj_batch)?;
                     if limit.map(|l| total_rows >= l).unwrap_or(false) {
                         break 'outer;
                     }
@@ -519,7 +463,7 @@ mod tests {
     use crate::datasource::{
         file_format::{parquet::ParquetFormat, FileFormat},
         object_store::local::{
-            local_file_meta, local_object_reader_stream, LocalFileSystem,
+            local_object_reader_stream, local_unpartitioned_file, LocalFileSystem,
         },
     };
 
@@ -533,21 +477,22 @@ mod tests {
     };
 
     #[tokio::test]
-    async fn test() -> Result<()> {
+    async fn parquet_exec_with_projection() -> Result<()> {
         let testdata = crate::test_util::parquet_test_data();
         let filename = format!("{}/alltypes_plain.parquet", testdata);
         let parquet_exec = ParquetExec::new(
-            Arc::new(LocalFileSystem {}),
-            vec![vec![PartitionedFile {
-                file_meta: local_file_meta(filename.clone()),
-            }]],
-            Statistics::default(),
-            ParquetFormat::default()
-                .infer_schema(local_object_reader_stream(vec![filename]))
-                .await?,
-            Some(vec![0, 1, 2]),
-            None,
-            1024,
+            PhysicalPlanConfig {
+                object_store: Arc::new(LocalFileSystem {}),
+                file_groups: vec![vec![local_unpartitioned_file(filename.clone())]],
+                file_schema: ParquetFormat::default()
+                    .infer_schema(local_object_reader_stream(vec![filename]))
+                    .await?,
+                statistics: Statistics::default(),
+                projection: Some(vec![0, 1, 2]),
+                batch_size: 1024,
+                limit: None,
+                table_partition_cols: vec![],
+            },
             None,
         );
         assert_eq!(parquet_exec.output_partitioning().partition_count(), 1);
@@ -575,6 +520,62 @@ mod tests {
         Ok(())
     }
 
+    #[tokio::test]
+    async fn parquet_exec_with_partition() -> Result<()> {
+        let testdata = crate::test_util::parquet_test_data();
+        let filename = format!("{}/alltypes_plain.parquet", testdata);
+        let mut partitioned_file = local_unpartitioned_file(filename.clone());
+        partitioned_file.partition_values = vec![
+            ScalarValue::Utf8(Some("2021".to_owned())),
+            ScalarValue::Utf8(Some("10".to_owned())),
+            ScalarValue::Utf8(Some("26".to_owned())),
+        ];
+        let parquet_exec = ParquetExec::new(
+            PhysicalPlanConfig {
+                object_store: Arc::new(LocalFileSystem {}),
+                file_groups: vec![vec![partitioned_file]],
+                file_schema: ParquetFormat::default()
+                    .infer_schema(local_object_reader_stream(vec![filename]))
+                    .await?,
+                statistics: Statistics::default(),
+                // file has 10 cols so index 12 should be month
+                projection: Some(vec![0, 1, 2, 12]),
+                batch_size: 1024,
+                limit: None,
+                table_partition_cols: vec![
+                    "year".to_owned(),
+                    "month".to_owned(),
+                    "day".to_owned(),
+                ],
+            },
+            None,
+        );
+        assert_eq!(parquet_exec.output_partitioning().partition_count(), 1);
+
+        let mut results = parquet_exec.execute(0).await?;
+        let batch = results.next().await.unwrap()?;
+        let expected = vec![
+            "+----+----------+-------------+-------+",
+            "| id | bool_col | tinyint_col | month |",
+            "+----+----------+-------------+-------+",
+            "| 4  | true     | 0           | 10    |",
+            "| 5  | false    | 1           | 10    |",
+            "| 6  | true     | 0           | 10    |",
+            "| 7  | false    | 1           | 10    |",
+            "| 2  | true     | 0           | 10    |",
+            "| 3  | false    | 1           | 10    |",
+            "| 0  | true     | 0           | 10    |",
+            "| 1  | false    | 1           | 10    |",
+            "+----+----------+-------------+-------+",
+        ];
+        crate::assert_batches_eq!(expected, &[batch]);
+
+        let batch = results.next().await;
+        assert!(batch.is_none());
+
+        Ok(())
+    }
+
     fn parquet_file_metrics() -> ParquetFileMetrics {
         let metrics = Arc::new(ExecutionPlanMetricsSet::new());
         ParquetFileMetrics::new(0, "file.parquet", &metrics)
diff --git a/datafusion/src/physical_plan/filter.rs b/datafusion/src/physical_plan/filter.rs
index 79b5ebc508f5..fe0f10313451 100644
--- a/datafusion/src/physical_plan/filter.rs
+++ b/datafusion/src/physical_plan/filter.rs
@@ -224,10 +224,10 @@ mod tests {
     use super::*;
     use crate::datasource::object_store::local::LocalFileSystem;
     use crate::physical_plan::expressions::*;
-    use crate::physical_plan::file_format::CsvExec;
+    use crate::physical_plan::file_format::{CsvExec, PhysicalPlanConfig};
     use crate::physical_plan::ExecutionPlan;
     use crate::scalar::ScalarValue;
-    use crate::test::{self, aggr_test_schema};
+    use crate::test::{self};
     use crate::{logical_plan::Operator, physical_plan::collect};
     use std::iter::Iterator;
 
@@ -240,15 +240,18 @@ mod tests {
             test::create_partitioned_csv("aggregate_test_100.csv", partitions)?;
 
         let csv = CsvExec::new(
-            Arc::new(LocalFileSystem {}),
-            files,
-            Statistics::default(),
-            aggr_test_schema(),
+            PhysicalPlanConfig {
+                object_store: Arc::new(LocalFileSystem {}),
+                file_schema: Arc::clone(&schema),
+                file_groups: files,
+                statistics: Statistics::default(),
+                projection: None,
+                batch_size: 1024,
+                limit: None,
+                table_partition_cols: vec![],
+            },
             true,
             b',',
-            None,
-            1024,
-            None,
         );
 
         let predicate: Arc<dyn PhysicalExpr> = binary(
diff --git a/datafusion/src/physical_plan/limit.rs b/datafusion/src/physical_plan/limit.rs
index bd48e4d2e5d4..f9c392a9056a 100644
--- a/datafusion/src/physical_plan/limit.rs
+++ b/datafusion/src/physical_plan/limit.rs
@@ -387,7 +387,7 @@ mod tests {
     use crate::datasource::object_store::local::LocalFileSystem;
     use crate::physical_plan::coalesce_partitions::CoalescePartitionsExec;
     use crate::physical_plan::common;
-    use crate::physical_plan::file_format::CsvExec;
+    use crate::physical_plan::file_format::{CsvExec, PhysicalPlanConfig};
     use crate::test;
 
     #[tokio::test]
@@ -399,15 +399,18 @@ mod tests {
             test::create_partitioned_csv("aggregate_test_100.csv", num_partitions)?;
 
         let csv = CsvExec::new(
-            Arc::new(LocalFileSystem {}),
-            files,
-            Statistics::default(),
-            schema,
+            PhysicalPlanConfig {
+                object_store: Arc::new(LocalFileSystem {}),
+                file_schema: schema,
+                file_groups: files,
+                statistics: Statistics::default(),
+                projection: None,
+                batch_size: 1024,
+                limit: None,
+                table_partition_cols: vec![],
+            },
             true,
             b',',
-            None,
-            1024,
-            None,
         );
 
         // input should have 4 partitions
diff --git a/datafusion/src/physical_plan/projection.rs b/datafusion/src/physical_plan/projection.rs
index 794d9a2ec68e..eb335c2100ac 100644
--- a/datafusion/src/physical_plan/projection.rs
+++ b/datafusion/src/physical_plan/projection.rs
@@ -261,9 +261,9 @@ mod tests {
     use super::*;
     use crate::datasource::object_store::local::LocalFileSystem;
     use crate::physical_plan::expressions::{self, col};
-    use crate::physical_plan::file_format::CsvExec;
+    use crate::physical_plan::file_format::{CsvExec, PhysicalPlanConfig};
     use crate::scalar::ScalarValue;
-    use crate::test::{self, aggr_test_schema};
+    use crate::test::{self};
     use futures::future;
 
     #[tokio::test]
@@ -275,15 +275,18 @@ mod tests {
             test::create_partitioned_csv("aggregate_test_100.csv", partitions)?;
 
         let csv = CsvExec::new(
-            Arc::new(LocalFileSystem {}),
-            files,
-            Statistics::default(),
-            aggr_test_schema(),
+            PhysicalPlanConfig {
+                object_store: Arc::new(LocalFileSystem {}),
+                file_schema: Arc::clone(&schema),
+                file_groups: files,
+                statistics: Statistics::default(),
+                projection: None,
+                batch_size: 1024,
+                limit: None,
+                table_partition_cols: vec![],
+            },
             true,
             b',',
-            None,
-            1024,
-            None,
         );
 
         // pick column c1 and name it column c1 in the output schema
diff --git a/datafusion/src/physical_plan/sort.rs b/datafusion/src/physical_plan/sort.rs
index 499d1f743844..a606906e8680 100644
--- a/datafusion/src/physical_plan/sort.rs
+++ b/datafusion/src/physical_plan/sort.rs
@@ -314,10 +314,13 @@ mod tests {
     use crate::physical_plan::coalesce_partitions::CoalescePartitionsExec;
     use crate::physical_plan::expressions::col;
     use crate::physical_plan::memory::MemoryExec;
-    use crate::physical_plan::{collect, file_format::CsvExec};
+    use crate::physical_plan::{
+        collect,
+        file_format::{CsvExec, PhysicalPlanConfig},
+    };
     use crate::test::assert_is_pending;
     use crate::test::exec::assert_strong_count_converges_to_zero;
-    use crate::test::{self, aggr_test_schema, exec::BlockingExec};
+    use crate::test::{self, exec::BlockingExec};
     use arrow::array::*;
     use arrow::datatypes::*;
     use futures::FutureExt;
@@ -330,15 +333,18 @@ mod tests {
             test::create_partitioned_csv("aggregate_test_100.csv", partitions)?;
 
         let csv = CsvExec::new(
-            Arc::new(LocalFileSystem {}),
-            files,
-            Statistics::default(),
-            aggr_test_schema(),
+            PhysicalPlanConfig {
+                object_store: Arc::new(LocalFileSystem {}),
+                file_schema: Arc::clone(&schema),
+                file_groups: files,
+                statistics: Statistics::default(),
+                projection: None,
+                batch_size: 1024,
+                limit: None,
+                table_partition_cols: vec![],
+            },
             true,
             b',',
-            None,
-            1024,
-            None,
         );
 
         let sort_exec = Arc::new(SortExec::try_new(
diff --git a/datafusion/src/physical_plan/sort_preserving_merge.rs b/datafusion/src/physical_plan/sort_preserving_merge.rs
index 5aaf9789f699..62f4b941f7f7 100644
--- a/datafusion/src/physical_plan/sort_preserving_merge.rs
+++ b/datafusion/src/physical_plan/sort_preserving_merge.rs
@@ -667,7 +667,7 @@ mod tests {
     use crate::assert_batches_eq;
     use crate::physical_plan::coalesce_partitions::CoalescePartitionsExec;
     use crate::physical_plan::expressions::col;
-    use crate::physical_plan::file_format::CsvExec;
+    use crate::physical_plan::file_format::{CsvExec, PhysicalPlanConfig};
     use crate::physical_plan::memory::MemoryExec;
     use crate::physical_plan::sort::SortExec;
     use crate::physical_plan::{collect, common};
@@ -936,15 +936,18 @@ mod tests {
             test::create_partitioned_csv("aggregate_test_100.csv", partitions).unwrap();
 
         let csv = Arc::new(CsvExec::new(
-            Arc::new(LocalFileSystem {}),
-            files,
-            Statistics::default(),
-            Arc::clone(&schema),
+            PhysicalPlanConfig {
+                object_store: Arc::new(LocalFileSystem {}),
+                file_schema: Arc::clone(&schema),
+                file_groups: files,
+                statistics: Statistics::default(),
+                projection: None,
+                batch_size: 1024,
+                limit: None,
+                table_partition_cols: vec![],
+            },
             true,
             b',',
-            None,
-            1024,
-            None,
         ));
 
         let sort = vec![
@@ -1016,15 +1019,18 @@ mod tests {
             test::create_partitioned_csv("aggregate_test_100.csv", partitions).unwrap();
 
         let csv = Arc::new(CsvExec::new(
-            Arc::new(LocalFileSystem {}),
-            files,
-            Statistics::default(),
-            schema,
+            PhysicalPlanConfig {
+                object_store: Arc::new(LocalFileSystem {}),
+                file_schema: schema,
+                file_groups: files,
+                statistics: Statistics::default(),
+                projection: None,
+                batch_size: 1024,
+                limit: None,
+                table_partition_cols: vec![],
+            },
             true,
             b',',
-            None,
-            1024,
-            None,
         ));
 
         let sorted = basic_sort(csv, sort).await;
diff --git a/datafusion/src/physical_plan/union.rs b/datafusion/src/physical_plan/union.rs
index 43e23850b19e..418be630bed9 100644
--- a/datafusion/src/physical_plan/union.rs
+++ b/datafusion/src/physical_plan/union.rs
@@ -220,8 +220,12 @@ mod tests {
     use super::*;
     use crate::datasource::object_store::{local::LocalFileSystem, ObjectStore};
     use crate::test;
+
     use crate::{
-        physical_plan::{collect, file_format::CsvExec},
+        physical_plan::{
+            collect,
+            file_format::{CsvExec, PhysicalPlanConfig},
+        },
         scalar::ScalarValue,
     };
     use arrow::record_batch::RecordBatch;
@@ -236,27 +240,33 @@ mod tests {
         let (_, files2) = test::create_partitioned_csv("aggregate_test_100.csv", 5)?;
 
         let csv = CsvExec::new(
-            Arc::clone(&fs),
-            files,
-            Statistics::default(),
-            Arc::clone(&schema),
+            PhysicalPlanConfig {
+                object_store: Arc::clone(&fs),
+                file_schema: Arc::clone(&schema),
+                file_groups: files,
+                statistics: Statistics::default(),
+                projection: None,
+                batch_size: 1024,
+                limit: None,
+                table_partition_cols: vec![],
+            },
             true,
             b',',
-            None,
-            1024,
-            None,
         );
 
         let csv2 = CsvExec::new(
-            Arc::clone(&fs),
-            files2,
-            Statistics::default(),
-            schema,
+            PhysicalPlanConfig {
+                object_store: Arc::clone(&fs),
+                file_schema: Arc::clone(&schema),
+                file_groups: files2,
+                statistics: Statistics::default(),
+                projection: None,
+                batch_size: 1024,
+                limit: None,
+                table_partition_cols: vec![],
+            },
             true,
             b',',
-            None,
-            1024,
-            None,
         );
 
         let union_exec = Arc::new(UnionExec::new(vec![Arc::new(csv), Arc::new(csv2)]));
diff --git a/datafusion/src/physical_plan/windows/mod.rs b/datafusion/src/physical_plan/windows/mod.rs
index ef420b2c8351..28bf40293612 100644
--- a/datafusion/src/physical_plan/windows/mod.rs
+++ b/datafusion/src/physical_plan/windows/mod.rs
@@ -178,7 +178,7 @@ mod tests {
     use crate::datasource::object_store::local::LocalFileSystem;
     use crate::physical_plan::aggregates::AggregateFunction;
     use crate::physical_plan::expressions::col;
-    use crate::physical_plan::file_format::CsvExec;
+    use crate::physical_plan::file_format::{CsvExec, PhysicalPlanConfig};
     use crate::physical_plan::{collect, Statistics};
     use crate::test::exec::{assert_strong_count_converges_to_zero, BlockingExec};
     use crate::test::{self, aggr_test_schema, assert_is_pending};
@@ -192,15 +192,18 @@ mod tests {
         let (_, files) =
             test::create_partitioned_csv("aggregate_test_100.csv", partitions)?;
         let csv = CsvExec::new(
-            Arc::new(LocalFileSystem {}),
-            files,
-            Statistics::default(),
-            aggr_test_schema(),
+            PhysicalPlanConfig {
+                object_store: Arc::new(LocalFileSystem {}),
+                file_schema: aggr_test_schema(),
+                file_groups: files,
+                statistics: Statistics::default(),
+                projection: None,
+                batch_size: 1024,
+                limit: None,
+                table_partition_cols: vec![],
+            },
             true,
             b',',
-            None,
-            1024,
-            None,
         );
 
         let input = Arc::new(csv);
diff --git a/datafusion/src/test/mod.rs b/datafusion/src/test/mod.rs
index f673eb065aaf..c13df55c05df 100644
--- a/datafusion/src/test/mod.rs
+++ b/datafusion/src/test/mod.rs
@@ -17,7 +17,7 @@
 
 //! Common unit test utility methods
 
-use crate::datasource::object_store::local::local_file_meta;
+use crate::datasource::object_store::local::local_unpartitioned_file;
 use crate::datasource::{MemTable, PartitionedFile, TableProvider};
 use crate::error::Result;
 use crate::logical_plan::{LogicalPlan, LogicalPlanBuilder};
@@ -98,11 +98,7 @@ pub fn create_partitioned_csv(
 
     let groups = files
         .into_iter()
-        .map(|f| {
-            vec![PartitionedFile {
-                file_meta: local_file_meta(f.to_str().unwrap().to_owned()),
-            }]
-        })
+        .map(|f| vec![local_unpartitioned_file(f.to_str().unwrap().to_owned())])
         .collect::<Vec<_>>();
 
     Ok((tmp_dir.into_path().to_str().unwrap().to_string(), groups))
diff --git a/datafusion/src/test/object_store.rs b/datafusion/src/test/object_store.rs
index 4020b999f7d0..e93b4cd2d410 100644
--- a/datafusion/src/test/object_store.rs
+++ b/datafusion/src/test/object_store.rs
@@ -14,7 +14,6 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
-
 //! Object store implem used for testing
 
 use std::{
@@ -34,14 +33,14 @@ use futures::{stream, AsyncRead, StreamExt};
 
 #[derive(Debug)]
 /// An object store implem that is useful for testing.
-/// The `ObjectReader`s are filled with zero bytes.
+/// `ObjectReader`s are filled with zero bytes.
 pub struct TestObjectStore {
     /// The `(path,size)` of the files that "exist" in the store
-    pub files: Vec<(String, u64)>,
+    files: Vec<(String, u64)>,
 }
 
 impl TestObjectStore {
-    pub fn new_arc(files: &[(&str, u64)]) -> Arc<Self> {
+    pub fn new_arc(files: &[(&str, u64)]) -> Arc<dyn ObjectStore> {
         Arc::new(Self {
             files: files.iter().map(|f| (f.0.to_owned(), f.1)).collect(),
         })
diff --git a/datafusion/tests/common.rs b/datafusion/tests/common.rs
new file mode 100644
index 000000000000..3490db5e091f
--- /dev/null
+++ b/datafusion/tests/common.rs
@@ -0,0 +1,40 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! methods that are common to multiple integration test setups
+
+use std::sync::Arc;
+
+use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
+
+pub fn aggr_test_schema() -> SchemaRef {
+    Arc::new(Schema::new(vec![
+        Field::new("c1", DataType::Utf8, false),
+        Field::new("c2", DataType::UInt32, false),
+        Field::new("c3", DataType::Int8, false),
+        Field::new("c4", DataType::Int16, false),
+        Field::new("c5", DataType::Int32, false),
+        Field::new("c6", DataType::Int64, false),
+        Field::new("c7", DataType::UInt8, false),
+        Field::new("c8", DataType::UInt16, false),
+        Field::new("c9", DataType::UInt32, false),
+        Field::new("c10", DataType::UInt64, false),
+        Field::new("c11", DataType::Float32, false),
+        Field::new("c12", DataType::Float64, false),
+        Field::new("c13", DataType::Utf8, false),
+    ]))
+}
diff --git a/datafusion/tests/path_partition.rs b/datafusion/tests/path_partition.rs
new file mode 100644
index 000000000000..789511065fc8
--- /dev/null
+++ b/datafusion/tests/path_partition.rs
@@ -0,0 +1,392 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Test queries on partitioned datasets
+
+use std::{fs, io, sync::Arc};
+
+use async_trait::async_trait;
+use datafusion::{
+    assert_batches_sorted_eq,
+    datasource::{
+        file_format::{csv::CsvFormat, parquet::ParquetFormat},
+        listing::{ListingOptions, ListingTable},
+        object_store::{
+            local::LocalFileSystem, FileMeta, FileMetaStream, ListEntryStream,
+            ObjectReader, ObjectStore, SizedFile,
+        },
+    },
+    error::{DataFusionError, Result},
+    physical_plan::ColumnStatistics,
+    prelude::ExecutionContext,
+    test_util::{arrow_test_data, parquet_test_data},
+};
+use futures::{stream, StreamExt};
+
+mod common;
+
+#[tokio::test]
+async fn csv_filter_with_file_col() -> Result<()> {
+    let mut ctx = ExecutionContext::new();
+
+    register_partitioned_aggregate_csv(
+        &mut ctx,
+        &[
+            "mytable/date=2021-10-27/file.csv",
+            "mytable/date=2021-10-28/file.csv",
+        ],
+        &["date"],
+        "mytable",
+    );
+
+    let result = ctx
+        .sql("SELECT c1, c2 FROM t WHERE date='2021-10-27' and date!=c1 LIMIT 5")
+        .await?
+        .collect()
+        .await?;
+
+    let expected = vec![
+        "+----+----+",
+        "| c1 | c2 |",
+        "+----+----+",
+        "| a  | 1  |",
+        "| b  | 1  |",
+        "| b  | 5  |",
+        "| c  | 2  |",
+        "| d  | 5  |",
+        "+----+----+",
+    ];
+    assert_batches_sorted_eq!(expected, &result);
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn csv_projection_on_partition() -> Result<()> {
+    let mut ctx = ExecutionContext::new();
+
+    register_partitioned_aggregate_csv(
+        &mut ctx,
+        &[
+            "mytable/date=2021-10-27/file.csv",
+            "mytable/date=2021-10-28/file.csv",
+        ],
+        &["date"],
+        "mytable",
+    );
+
+    let result = ctx
+        .sql("SELECT c1, date FROM t WHERE date='2021-10-27' LIMIT 5")
+        .await?
+        .collect()
+        .await?;
+
+    let expected = vec![
+        "+----+------------+",
+        "| c1 | date       |",
+        "+----+------------+",
+        "| a  | 2021-10-27 |",
+        "| b  | 2021-10-27 |",
+        "| b  | 2021-10-27 |",
+        "| c  | 2021-10-27 |",
+        "| d  | 2021-10-27 |",
+        "+----+------------+",
+    ];
+    assert_batches_sorted_eq!(expected, &result);
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn csv_grouping_by_partition() -> Result<()> {
+    let mut ctx = ExecutionContext::new();
+
+    register_partitioned_aggregate_csv(
+        &mut ctx,
+        &[
+            "mytable/date=2021-10-26/file.csv",
+            "mytable/date=2021-10-27/file.csv",
+            "mytable/date=2021-10-28/file.csv",
+        ],
+        &["date"],
+        "mytable",
+    );
+
+    let result = ctx
+        .sql("SELECT date, count(*), count(distinct(c1)) FROM t WHERE date<='2021-10-27' GROUP BY date")
+        .await?
+        .collect()
+        .await?;
+
+    let expected = vec![
+        "+------------+-----------------+----------------------+",
+        "| date       | COUNT(UInt8(1)) | COUNT(DISTINCT t.c1) |",
+        "+------------+-----------------+----------------------+",
+        "| 2021-10-26 | 100             | 5                    |",
+        "| 2021-10-27 | 100             | 5                    |",
+        "+------------+-----------------+----------------------+",
+    ];
+    assert_batches_sorted_eq!(expected, &result);
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn parquet_multiple_partitions() -> Result<()> {
+    let mut ctx = ExecutionContext::new();
+
+    register_partitioned_alltypes_parquet(
+        &mut ctx,
+        &[
+            "year=2021/month=09/day=09/file.parquet",
+            "year=2021/month=10/day=09/file.parquet",
+            "year=2021/month=10/day=28/file.parquet",
+        ],
+        &["year", "month", "day"],
+        "",
+        "alltypes_plain.parquet",
+    )
+    .await;
+
+    let result = ctx
+        .sql("SELECT id, day FROM t WHERE day=month ORDER BY id")
+        .await?
+        .collect()
+        .await?;
+
+    let expected = vec![
+        "+----+-----+",
+        "| id | day |",
+        "+----+-----+",
+        "| 0  | 09  |",
+        "| 1  | 09  |",
+        "| 2  | 09  |",
+        "| 3  | 09  |",
+        "| 4  | 09  |",
+        "| 5  | 09  |",
+        "| 6  | 09  |",
+        "| 7  | 09  |",
+        "+----+-----+",
+    ];
+    assert_batches_sorted_eq!(expected, &result);
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn parquet_statistics() -> Result<()> {
+    let mut ctx = ExecutionContext::new();
+
+    register_partitioned_alltypes_parquet(
+        &mut ctx,
+        &[
+            "year=2021/month=09/day=09/file.parquet",
+            "year=2021/month=10/day=09/file.parquet",
+            "year=2021/month=10/day=28/file.parquet",
+        ],
+        &["year", "month", "day"],
+        "",
+        // This is the only file we found in the test set with
+        // actual stats. It has 1 column / 1 row.
+        "single_nan.parquet",
+    )
+    .await;
+
+    //// NO PROJECTION ////
+    let logical_plan = ctx.sql("SELECT * FROM t").await?.to_logical_plan();
+
+    let physical_plan = ctx.create_physical_plan(&logical_plan).await?;
+    assert_eq!(physical_plan.schema().fields().len(), 4);
+
+    let stat_cols = physical_plan
+        .statistics()
+        .column_statistics
+        .expect("col stats should be defined");
+    assert_eq!(stat_cols.len(), 4);
+    // stats for the first col are read from the parquet file
+    assert_eq!(stat_cols[0].null_count, Some(3));
+    // TODO assert partition column (1,2,3) stats once implemented (#1186)
+    assert_eq!(stat_cols[1], ColumnStatistics::default());
+    assert_eq!(stat_cols[2], ColumnStatistics::default());
+    assert_eq!(stat_cols[3], ColumnStatistics::default());
+
+    //// WITH PROJECTION ////
+    let logical_plan = ctx
+        .sql("SELECT mycol, day FROM t WHERE day='28'")
+        .await?
+        .to_logical_plan();
+
+    let physical_plan = ctx.create_physical_plan(&logical_plan).await?;
+    assert_eq!(physical_plan.schema().fields().len(), 2);
+
+    let stat_cols = physical_plan
+        .statistics()
+        .column_statistics
+        .expect("col stats should be defined");
+    assert_eq!(stat_cols.len(), 2);
+    // stats for the first col are read from the parquet file
+    assert_eq!(stat_cols[0].null_count, Some(1));
+    // TODO assert partition column stats once implemented (#1186)
+    assert_eq!(stat_cols[1], ColumnStatistics::default());
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn parquet_overlapping_columns() -> Result<()> {
+    let mut ctx = ExecutionContext::new();
+
+    // `id` is both a column of the file and a partitioning col
+    register_partitioned_alltypes_parquet(
+        &mut ctx,
+        &[
+            "id=1/file.parquet",
+            "id=2/file.parquet",
+            "id=3/file.parquet",
+        ],
+        &["id"],
+        "",
+        "alltypes_plain.parquet",
+    )
+    .await;
+
+    let result = ctx.sql("SELECT id FROM t WHERE id=1 ORDER BY id").await;
+
+    assert!(
+        result.is_err(),
+        "Dupplicate qualified name should raise error"
+    );
+    Ok(())
+}
+
+fn register_partitioned_aggregate_csv(
+    ctx: &mut ExecutionContext,
+    store_paths: &[&str],
+    partition_cols: &[&str],
+    table_path: &str,
+) {
+    let testdata = arrow_test_data();
+    let csv_file_path = format!("{}/csv/aggregate_test_100.csv", testdata);
+    let file_schema = common::aggr_test_schema();
+    let object_store = MirroringObjectStore::new_arc(csv_file_path, store_paths);
+
+    let mut options = ListingOptions::new(Arc::new(CsvFormat::default()));
+    options.table_partition_cols = partition_cols.iter().map(|&s| s.to_owned()).collect();
+
+    let table =
+        ListingTable::new(object_store, table_path.to_owned(), file_schema, options);
+
+    ctx.register_table("t", Arc::new(table))
+        .expect("registering listing table failed");
+}
+
+async fn register_partitioned_alltypes_parquet(
+    ctx: &mut ExecutionContext,
+    store_paths: &[&str],
+    partition_cols: &[&str],
+    table_path: &str,
+    source_file: &str,
+) {
+    let testdata = parquet_test_data();
+    let parquet_file_path = format!("{}/{}", testdata, source_file);
+    let object_store =
+        MirroringObjectStore::new_arc(parquet_file_path.clone(), store_paths);
+
+    let mut options = ListingOptions::new(Arc::new(ParquetFormat::default()));
+    options.table_partition_cols = partition_cols.iter().map(|&s| s.to_owned()).collect();
+    options.collect_stat = true;
+
+    let file_schema = options
+        .infer_schema(Arc::clone(&object_store), store_paths[0])
+        .await
+        .expect("Parquet schema inference failed");
+
+    let table =
+        ListingTable::new(object_store, table_path.to_owned(), file_schema, options);
+
+    ctx.register_table("t", Arc::new(table))
+        .expect("registering listing table failed");
+}
+
+#[derive(Debug)]
+/// An object store implem that is mirrors a given file to multiple paths.
+pub struct MirroringObjectStore {
+    /// The `(path,size)` of the files that "exist" in the store
+    files: Vec<String>,
+    /// The file that will be read at all path
+    mirrored_file: String,
+    /// Size of the mirrored file
+    file_size: u64,
+}
+
+impl MirroringObjectStore {
+    pub fn new_arc(mirrored_file: String, paths: &[&str]) -> Arc<dyn ObjectStore> {
+        let metadata = fs::metadata(&mirrored_file).expect("Local file metadata");
+        Arc::new(Self {
+            files: paths.iter().map(|&f| f.to_owned()).collect(),
+            mirrored_file,
+            file_size: metadata.len(),
+        })
+    }
+}
+
+#[async_trait]
+impl ObjectStore for MirroringObjectStore {
+    async fn list_file(&self, prefix: &str) -> Result<FileMetaStream> {
+        let prefix = prefix.to_owned();
+        let size = self.file_size;
+        Ok(Box::pin(
+            stream::iter(
+                self.files
+                    .clone()
+                    .into_iter()
+                    .filter(move |f| f.starts_with(&prefix)),
+            )
+            .map(move |f| {
+                Ok(FileMeta {
+                    sized_file: SizedFile { path: f, size },
+                    last_modified: None,
+                })
+            }),
+        ))
+    }
+
+    async fn list_dir(
+        &self,
+        _prefix: &str,
+        _delimiter: Option<String>,
+    ) -> Result<ListEntryStream> {
+        unimplemented!()
+    }
+
+    fn file_reader(&self, file: SizedFile) -> Result<Arc<dyn ObjectReader>> {
+        assert_eq!(
+            self.file_size, file.size,
+            "Requested files should have the same size as the mirrored file"
+        );
+        match self.files.iter().find(|&item| &file.path == item) {
+            Some(_) => Ok(LocalFileSystem {}.file_reader(SizedFile {
+                path: self.mirrored_file.clone(),
+                size: self.file_size,
+            })?),
+            None => Err(DataFusionError::IoError(io::Error::new(
+                io::ErrorKind::NotFound,
+                "not in provided test list",
+            ))),
+        }
+    }
+}
diff --git a/datafusion/tests/sql.rs b/datafusion/tests/sql.rs
index f1e988814add..cf099193085e 100644
--- a/datafusion/tests/sql.rs
+++ b/datafusion/tests/sql.rs
@@ -49,6 +49,8 @@ use datafusion::{
 };
 use datafusion::{execution::context::ExecutionContext, physical_plan::displayable};
 
+mod common;
+
 #[tokio::test]
 async fn nyc() -> Result<()> {
     // schema for nyxtaxi csv files
@@ -3195,24 +3197,6 @@ async fn explain_analyze_runs_optimizers() {
     assert_contains!(actual, expected);
 }
 
-fn aggr_test_schema() -> SchemaRef {
-    Arc::new(Schema::new(vec![
-        Field::new("c1", DataType::Utf8, false),
-        Field::new("c2", DataType::UInt32, false),
-        Field::new("c3", DataType::Int8, false),
-        Field::new("c4", DataType::Int16, false),
-        Field::new("c5", DataType::Int32, false),
-        Field::new("c6", DataType::Int64, false),
-        Field::new("c7", DataType::UInt8, false),
-        Field::new("c8", DataType::UInt16, false),
-        Field::new("c9", DataType::UInt32, false),
-        Field::new("c10", DataType::UInt64, false),
-        Field::new("c11", DataType::Float32, false),
-        Field::new("c12", DataType::Float64, false),
-        Field::new("c13", DataType::Utf8, false),
-    ]))
-}
-
 async fn register_aggregate_csv_by_sql(ctx: &mut ExecutionContext) {
     let testdata = datafusion::test_util::arrow_test_data();
 
@@ -3256,7 +3240,7 @@ async fn register_aggregate_csv_by_sql(ctx: &mut ExecutionContext) {
 
 async fn register_aggregate_csv(ctx: &mut ExecutionContext) -> Result<()> {
     let testdata = datafusion::test_util::arrow_test_data();
-    let schema = aggr_test_schema();
+    let schema = common::aggr_test_schema();
     ctx.register_csv(
         "aggregate_test_100",
         &format!("{}/csv/aggregate_test_100.csv", testdata),