-
Notifications
You must be signed in to change notification settings - Fork 1.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
File partitioning for ListingTable #1141
Changes from 15 commits
c184efa
2e82902
cbbf347
f74ee67
fb15b3b
95216d8
26bf105
c34193d
32c5bf6
10b52eb
f207b92
dfa6d0e
f8edb38
c5cfcfb
cb0789e
0303913
5d34be6
a4ec801
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -274,6 +274,7 @@ message PartitionedFile { | |
string path = 1; | ||
uint64 size = 2; | ||
uint64 last_modified_ns = 3; | ||
repeated ScalarValue partition_values = 4; | ||
} | ||
|
||
message CsvFormat { | ||
|
@@ -294,7 +295,7 @@ message ListingTableScanNode { | |
ProjectionColumns projection = 4; | ||
Schema schema = 5; | ||
repeated LogicalExprNode filters = 6; | ||
repeated string partitions = 7; | ||
repeated string table_partition_cols = 7; | ||
bool collect_stat = 8; | ||
uint32 target_partitions = 9; | ||
oneof FileFormatType { | ||
|
@@ -613,33 +614,28 @@ message ScanLimit { | |
uint32 limit = 1; | ||
} | ||
|
||
message ParquetScanExecNode { | ||
message FileScanExecConf { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Promoted the The rational for doing this here is that clippy was complaining more and more often that methods had too many arguments 😉 Suggestions on the naming of the config entity ( There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. makes sense to me |
||
repeated FileGroup file_groups = 1; | ||
Schema schema = 2; | ||
uint32 batch_size = 4; | ||
repeated uint32 projection = 6; | ||
ScanLimit limit = 7; | ||
Statistics statistics = 8; | ||
uint32 batch_size = 3; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. would this be back-compatible? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As long as you don't have DataFusion nodes with different versions, it should be ok! |
||
repeated uint32 projection = 4; | ||
ScanLimit limit = 5; | ||
Statistics statistics = 6; | ||
repeated string table_partition_cols = 7; | ||
} | ||
|
||
message ParquetScanExecNode { | ||
FileScanExecConf base_conf = 1; | ||
rdettai marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
message CsvScanExecNode { | ||
repeated FileGroup file_groups = 1; | ||
Schema schema = 2; | ||
bool has_header = 3; | ||
uint32 batch_size = 4; | ||
string delimiter = 5; | ||
repeated uint32 projection = 6; | ||
ScanLimit limit = 7; | ||
Statistics statistics = 8; | ||
FileScanExecConf base_conf = 1; | ||
bool has_header = 2; | ||
string delimiter = 3; | ||
} | ||
|
||
message AvroScanExecNode { | ||
repeated FileGroup file_groups = 1; | ||
Schema schema = 2; | ||
uint32 batch_size = 4; | ||
repeated uint32 projection = 6; | ||
ScanLimit limit = 7; | ||
Statistics statistics = 8; | ||
FileScanExecConf base_conf = 1; | ||
} | ||
|
||
enum PartitionMode { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Renamed the
partitions
field totable_partition_cols
to make it more explicit (cf #1141 (comment))