chore: fix typos of common and core packages (apache#11520)

influxdata · Jul 17, 2024 · 81a6094 · 81a6094
1 parent 0021356
commit 81a6094
Show file tree

Hide file tree

Showing 33 changed files with 65 additions and 65 deletions.
diff --git a/datafusion/common/src/column.rs b/datafusion/common/src/column.rs
@@ -214,7 +214,7 @@ impl Column {
                     for using_col in using_columns {
                         let all_matched = columns.iter().all(|f| using_col.contains(f));
                         // All matched fields belong to the same using column set, in orther words
-                        // the same join clause. We simply pick the qualifer from the first match.
+                        // the same join clause. We simply pick the qualifier from the first match.
                         if all_matched {
                             return Ok(columns[0].clone());
                         }
@@ -303,7 +303,7 @@ impl Column {
                     for using_col in using_columns {
                         let all_matched = columns.iter().all(|c| using_col.contains(c));
                         // All matched fields belong to the same using column set, in orther words
-                        // the same join clause. We simply pick the qualifer from the first match.
+                        // the same join clause. We simply pick the qualifier from the first match.
                         if all_matched {
                             return Ok(columns[0].clone());
                         }

diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs
@@ -1310,7 +1310,7 @@ mod tests {
         ])
     }
     #[test]
-    fn test_dfschema_to_schema_convertion() {
+    fn test_dfschema_to_schema_conversion() {
         let mut a_metadata = HashMap::new();
         a_metadata.insert("key".to_string(), "value".to_string());
         let a_field = Field::new("a", DataType::Int64, false).with_metadata(a_metadata);

diff --git a/datafusion/common/src/functional_dependencies.rs b/datafusion/common/src/functional_dependencies.rs
@@ -433,7 +433,7 @@ impl FunctionalDependencies {
     }
 
     /// This function ensures that functional dependencies involving uniquely
-    /// occuring determinant keys cover their entire table in terms of
+    /// occurring determinant keys cover their entire table in terms of
     /// dependent columns.
     pub fn extend_target_indices(&mut self, n_out: usize) {
         self.deps.iter_mut().for_each(

diff --git a/datafusion/common/src/table_reference.rs b/datafusion/common/src/table_reference.rs
@@ -62,7 +62,7 @@ impl std::fmt::Display for ResolvedTableReference {
 /// assert_eq!(table_reference, TableReference::bare("mytable"));
 ///
 /// // Get a table reference to 'MyTable' (note the capitalization) using double quotes
-/// // (programatically it is better to use `TableReference::bare` for this)
+/// // (programmatically it is better to use `TableReference::bare` for this)
 /// let table_reference = TableReference::from(r#""MyTable""#);
 /// assert_eq!(table_reference, TableReference::bare("MyTable"));
 ///

diff --git a/datafusion/core/src/catalog/information_schema.rs b/datafusion/core/src/catalog/information_schema.rs
@@ -314,7 +314,7 @@ impl InformationSchemaTablesBuilder {
         table_name: impl AsRef<str>,
         table_type: TableType,
     ) {
-        // Note: append_value is actually infallable.
+        // Note: append_value is actually infallible.
         self.catalog_names.append_value(catalog_name.as_ref());
         self.schema_names.append_value(schema_name.as_ref());
         self.table_names.append_value(table_name.as_ref());
@@ -405,7 +405,7 @@ impl InformationSchemaViewBuilder {
         table_name: impl AsRef<str>,
         definition: Option<impl AsRef<str>>,
     ) {
-        // Note: append_value is actually infallable.
+        // Note: append_value is actually infallible.
         self.catalog_names.append_value(catalog_name.as_ref());
         self.schema_names.append_value(schema_name.as_ref());
         self.table_names.append_value(table_name.as_ref());

diff --git a/datafusion/core/src/datasource/file_format/arrow.rs b/datafusion/core/src/datasource/file_format/arrow.rs
@@ -353,7 +353,7 @@ async fn infer_schema_from_file_stream(
     // Expected format:
     // <magic number "ARROW1"> - 6 bytes
     // <empty padding bytes [to 8 byte boundary]> - 2 bytes
-    // <continutation: 0xFFFFFFFF> - 4 bytes, not present below v0.15.0
+    // <continuation: 0xFFFFFFFF> - 4 bytes, not present below v0.15.0
     // <metadata_size: int32> - 4 bytes
     // <metadata_flatbuffer: bytes>
     // <rest of file bytes>
@@ -365,7 +365,7 @@ async fn infer_schema_from_file_stream(
     // Files should start with these magic bytes
     if bytes[0..6] != ARROW_MAGIC {
         return Err(ArrowError::ParseError(
-            "Arrow file does not contian correct header".to_string(),
+            "Arrow file does not contain correct header".to_string(),
         ))?;
     }
 

diff --git a/datafusion/core/src/datasource/file_format/csv.rs b/datafusion/core/src/datasource/file_format/csv.rs
@@ -645,7 +645,7 @@ mod tests {
         let session_ctx = SessionContext::new_with_config(config);
         let state = session_ctx.state();
         let task_ctx = state.task_ctx();
-        // skip column 9 that overflows the automaticly discovered column type of i64 (u64 would work)
+        // skip column 9 that overflows the automatically discovered column type of i64 (u64 would work)
         let projection = Some(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12]);
         let exec =
             get_exec(&state, "aggregate_test_100.csv", projection, None, true).await?;

diff --git a/datafusion/core/src/datasource/file_format/parquet.rs b/datafusion/core/src/datasource/file_format/parquet.rs
@@ -854,14 +854,14 @@ fn spawn_column_parallel_row_group_writer(
     let mut col_array_channels = Vec::with_capacity(num_columns);
     for writer in col_writers.into_iter() {
         // Buffer size of this channel limits the number of arrays queued up for column level serialization
-        let (send_array, recieve_array) =
+        let (send_array, receive_array) =
             mpsc::channel::<ArrowLeafColumn>(max_buffer_size);
         col_array_channels.push(send_array);
 
         let reservation =
             MemoryConsumer::new("ParquetSink(ArrowColumnWriter)").register(pool);
         let task = SpawnedTask::spawn(column_serializer_task(
-            recieve_array,
+            receive_array,
             writer,
             reservation,
         ));
@@ -936,7 +936,7 @@ fn spawn_rg_join_and_finalize_task(
 /// row group is reached, the parallel tasks are joined on another separate task
 /// and sent to a concatenation task. This task immediately continues to work
 /// on the next row group in parallel. So, parquet serialization is parallelized
-/// accross both columns and row_groups, with a theoretical max number of parallel tasks
+/// across both columns and row_groups, with a theoretical max number of parallel tasks
 /// given by n_columns * num_row_groups.
 fn spawn_parquet_parallel_serialization_task(
     mut data: Receiver<RecordBatch>,
@@ -1560,7 +1560,7 @@ mod tests {
         // . batch1 written into first file and includes:
         //    - column c1 that has 3 rows with one null. Stats min and max of string column is missing for this test even the column has values
         // . batch2 written into second file and includes:
-        //    - column c2 that has 3 rows with one null. Stats min and max of int are avaialble and 1 and 2 respectively
+        //    - column c2 that has 3 rows with one null. Stats min and max of int are available and 1 and 2 respectively
         let store = Arc::new(LocalFileSystem::new()) as _;
         let (files, _file_names) = store_parquet(vec![batch1, batch2], false).await?;
 
@@ -2112,7 +2112,7 @@ mod tests {
         let path_parts = path.parts().collect::<Vec<_>>();
         assert_eq!(path_parts.len(), 1, "should not have path prefix");
 
-        assert_eq!(num_rows, 2, "file metdata to have 2 rows");
+        assert_eq!(num_rows, 2, "file metadata to have 2 rows");
         assert!(
             schema.iter().any(|col_schema| col_schema.name == "a"),
             "output file metadata should contain col a"
@@ -2208,7 +2208,7 @@ mod tests {
             );
             expected_partitions.remove(prefix);
 
-            assert_eq!(num_rows, 1, "file metdata to have 1 row");
+            assert_eq!(num_rows, 1, "file metadata to have 1 row");
             assert!(
                 !schema.iter().any(|col_schema| col_schema.name == "a"),
                 "output file metadata will not contain partitioned col a"

diff --git a/datafusion/core/src/datasource/file_format/write/demux.rs b/datafusion/core/src/datasource/file_format/write/demux.rs
@@ -54,7 +54,7 @@ type DemuxedStreamReceiver = UnboundedReceiver<(Path, RecordBatchReceiver)>;
 /// which should be contained within the same output file. The outer channel
 /// is used to send a dynamic number of inner channels, representing a dynamic
 /// number of total output files. The caller is also responsible to monitor
-/// the demux task for errors and abort accordingly. The single_file_ouput parameter
+/// the demux task for errors and abort accordingly. The single_file_output parameter
 /// overrides all other settings to force only a single file to be written.
 /// partition_by parameter will additionally split the input based on the unique
 /// values of a specific column `<https://github.com/apache/datafusion/issues/7744>``

diff --git a/datafusion/core/src/datasource/file_format/write/orchestration.rs b/datafusion/core/src/datasource/file_format/write/orchestration.rs
@@ -141,7 +141,7 @@ pub(crate) async fn stateless_serialize_and_write_files(
     // tracks the specific error triggering abort
     let mut triggering_error = None;
     // tracks if any errors were encountered in the process of aborting writers.
-    // if true, we may not have a guarentee that all written data was cleaned up.
+    // if true, we may not have a guarantee that all written data was cleaned up.
     let mut any_abort_errors = false;
     let mut join_set = JoinSet::new();
     while let Some((data_rx, serializer, writer)) = rx.recv().await {
@@ -188,7 +188,7 @@ pub(crate) async fn stateless_serialize_and_write_files(
             true => return internal_err!("Error encountered during writing to ObjectStore and failed to abort all writers. Partial result may have been written."),
             false => match triggering_error {
                 Some(e) => return Err(e),
-                None => return internal_err!("Unknown Error encountered during writing to ObjectStore. All writers succesfully aborted.")
+                None => return internal_err!("Unknown Error encountered during writing to ObjectStore. All writers successfully aborted.")
             }
         }
     }
@@ -268,7 +268,7 @@ pub(crate) async fn stateless_multipart_put(
     r2?;
 
     let total_count = rx_row_cnt.await.map_err(|_| {
-        internal_datafusion_err!("Did not receieve row count from write coordinater")
+        internal_datafusion_err!("Did not receive row count from write coordinator")
     })?;
 
     Ok(total_count)

diff --git a/datafusion/core/src/datasource/listing/helpers.rs b/datafusion/core/src/datasource/listing/helpers.rs
@@ -759,7 +759,7 @@ mod tests {
                 .otherwise(lit(false))
                 .expect("valid case expr"))
         ));
-        // static expression not relvant in this context but we
+        // static expression not relevant in this context but we
         // test it as an edge case anyway in case we want to generalize
         // this helper function
         assert!(expr_applicable_for_cols(&[], &lit(true)));

diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs
@@ -695,8 +695,8 @@ impl ListingTable {
     }
 
     /// Specify the SQL definition for this table, if any
-    pub fn with_definition(mut self, defintion: Option<String>) -> Self {
-        self.definition = defintion;
+    pub fn with_definition(mut self, definition: Option<String>) -> Self {
+        self.definition = definition;
         self
     }
 

diff --git a/datafusion/core/src/datasource/listing/url.rs b/datafusion/core/src/datasource/listing/url.rs
@@ -53,7 +53,7 @@ impl ListingTableUrl {
     /// subdirectories.
     ///
     /// Similarly `s3://BUCKET/blob.csv` refers to `blob.csv` in the S3 bucket `BUCKET`,
-    /// wherease `s3://BUCKET/foo/` refers to all objects with the prefix `foo/` in the
+    /// whereas `s3://BUCKET/foo/` refers to all objects with the prefix `foo/` in the
     /// S3 bucket `BUCKET`
     ///
     /// # URL Encoding

diff --git a/datafusion/core/src/datasource/memory.rs b/datafusion/core/src/datasource/memory.rs
@@ -644,7 +644,7 @@ mod tests {
         Ok(partitions)
     }
 
-    /// Returns the value of results. For example, returns 6 given the follwing
+    /// Returns the value of results. For example, returns 6 given the following
     ///
     /// ```text
     /// +-------+,

diff --git a/datafusion/core/src/datasource/physical_plan/file_scan_config.rs b/datafusion/core/src/datasource/physical_plan/file_scan_config.rs
@@ -56,7 +56,7 @@ pub fn wrap_partition_type_in_dict(val_type: DataType) -> DataType {
 }
 
 /// Convert a [`ScalarValue`] of partition columns to a type, as
-/// decribed in the documentation of [`wrap_partition_type_in_dict`],
+/// described in the documentation of [`wrap_partition_type_in_dict`],
 /// which can wrap the types.
 pub fn wrap_partition_value_in_dict(val: ScalarValue) -> ScalarValue {
     ScalarValue::Dictionary(Box::new(DataType::UInt16), Box::new(val))
@@ -682,7 +682,7 @@ mod tests {
             vec![table_partition_col.clone()],
         );
 
-        // verify the proj_schema inlcudes the last column and exactly the same the field it is defined
+        // verify the proj_schema includes the last column and exactly the same the field it is defined
         let (proj_schema, _proj_statistics, _) = conf.project();
         assert_eq!(proj_schema.fields().len(), file_schema.fields().len() + 1);
         assert_eq!(

diff --git a/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs b/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs
@@ -1123,7 +1123,7 @@ mod tests {
     }
 
     #[tokio::test]
-    async fn test_row_group_bloom_filter_pruning_predicate_mutiple_expr() {
+    async fn test_row_group_bloom_filter_pruning_predicate_multiple_expr() {
         BloomFilterTest::new_data_index_bloom_encoding_stats()
             .with_expect_all_pruned()
             // generate pruning predicate `(String = "Hello_Not_exists" OR String = "Hello_Not_exists2")`

diff --git a/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs b/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs
@@ -2532,43 +2532,43 @@ mod test {
 
     fn timestamp_seconds_array(
         input: impl IntoIterator<Item = Option<i64>>,
-        timzezone: Option<&str>,
+        timezone: Option<&str>,
     ) -> ArrayRef {
         let array: TimestampSecondArray = input.into_iter().collect();
-        match timzezone {
+        match timezone {
             Some(tz) => Arc::new(array.with_timezone(tz)),
             None => Arc::new(array),
         }
     }
 
     fn timestamp_milliseconds_array(
         input: impl IntoIterator<Item = Option<i64>>,
-        timzezone: Option<&str>,
+        timezone: Option<&str>,
     ) -> ArrayRef {
         let array: TimestampMillisecondArray = input.into_iter().collect();
-        match timzezone {
+        match timezone {
             Some(tz) => Arc::new(array.with_timezone(tz)),
             None => Arc::new(array),
         }
     }
 
     fn timestamp_microseconds_array(
         input: impl IntoIterator<Item = Option<i64>>,
-        timzezone: Option<&str>,
+        timezone: Option<&str>,
     ) -> ArrayRef {
         let array: TimestampMicrosecondArray = input.into_iter().collect();
-        match timzezone {
+        match timezone {
             Some(tz) => Arc::new(array.with_timezone(tz)),
             None => Arc::new(array),
         }
     }
 
     fn timestamp_nanoseconds_array(
         input: impl IntoIterator<Item = Option<i64>>,
-        timzezone: Option<&str>,
+        timezone: Option<&str>,
     ) -> ArrayRef {
         let array: TimestampNanosecondArray = input.into_iter().collect();
-        match timzezone {
+        match timezone {
             Some(tz) => Arc::new(array.with_timezone(tz)),
             None => Arc::new(array),
         }

diff --git a/datafusion/core/src/datasource/schema_adapter.rs b/datafusion/core/src/datasource/schema_adapter.rs
@@ -251,7 +251,7 @@ mod tests {
 
     #[tokio::test]
     async fn can_override_schema_adapter() {
-        // Test shows that SchemaAdapter can add a column that doesn't existin in the
+        // Test shows that SchemaAdapter can add a column that doesn't existing in the
         // record batches returned from parquet.  This can be useful for schema evolution
         // where older files may not have all columns.
         let tmp_dir = TempDir::new().unwrap();

diff --git a/datafusion/core/src/datasource/streaming.rs b/datafusion/core/src/datasource/streaming.rs
@@ -50,7 +50,7 @@ impl StreamingTable {
             if !schema.contains(partition_schema) {
                 debug!(
                     "target schema does not contain partition schema. \
-                        Target_schema: {schema:?}. Partiton Schema: {partition_schema:?}"
+                        Target_schema: {schema:?}. Partition Schema: {partition_schema:?}"
                 );
                 return plan_err!("Mismatch between schema and batches");
             }

diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs
@@ -500,7 +500,7 @@ impl SessionContext {
         self.execute_logical_plan(plan).await
     }
 
-    /// Creates logical expresssions from SQL query text.
+    /// Creates logical expressions from SQL query text.
     ///
     /// # Example: Parsing SQL queries
     ///
@@ -1352,7 +1352,7 @@ impl SessionContext {
         self.state.write().register_catalog_list(catalog_list)
     }
 
-    /// Registers a [`ConfigExtension`] as a table option extention that can be
+    /// Registers a [`ConfigExtension`] as a table option extension that can be
     /// referenced from SQL statements executed against this context.
     pub fn register_table_options_extension<T: ConfigExtension>(&self, extension: T) {
         self.state

diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs
@@ -1019,7 +1019,7 @@ impl SessionStateBuilder {
         self
     }
 
-    /// Set tje [`PhysicalOptimizerRule`]s used to optimize plans.
+    /// Set the [`PhysicalOptimizerRule`]s used to optimize plans.
     pub fn with_physical_optimizer_rules(
         mut self,
         physical_optimizers: Vec<Arc<dyn PhysicalOptimizerRule + Send + Sync>>,

diff --git a/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs b/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs
@@ -354,7 +354,7 @@ mod tests {
             PhysicalGroupBy::default(),
             aggr_expr,
         );
-        // should combine the Partial/Final AggregateExecs to tne Single AggregateExec
+        // should combine the Partial/Final AggregateExecs to the Single AggregateExec
         let expected = &[
             "AggregateExec: mode=Single, gby=[], aggr=[COUNT(1)]",
             "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c]",
@@ -394,7 +394,7 @@ mod tests {
         let final_group_by = PhysicalGroupBy::new_single(groups);
 
         let plan = final_aggregate_exec(partial_agg, final_group_by, aggr_expr);
-        // should combine the Partial/Final AggregateExecs to tne Single AggregateExec
+        // should combine the Partial/Final AggregateExecs to the Single AggregateExec
         let expected = &[
             "AggregateExec: mode=Single, gby=[c@2 as c], aggr=[Sum(b)]",
             "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c]",

diff --git a/datafusion/core/src/physical_optimizer/enforce_distribution.rs b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
@@ -392,7 +392,7 @@ fn adjust_input_keys_ordering(
         let expr = proj.expr();
         // For Projection, we need to transform the requirements to the columns before the Projection
         // And then to push down the requirements
-        // Construct a mapping from new name to the orginal Column
+        // Construct a mapping from new name to the original Column
         let new_required = map_columns_before_projection(&requirements.data, expr);
         if new_required.len() == requirements.data.len() {
             requirements.children[0].data = new_required;
@@ -566,7 +566,7 @@ fn shift_right_required(
         })
         .collect::<Vec<_>>();
 
-    // if the parent required are all comming from the right side, the requirements can be pushdown
+    // if the parent required are all coming from the right side, the requirements can be pushdown
     (new_right_required.len() == parent_required.len()).then_some(new_right_required)
 }