Skip to content

Commit

Permalink
add UUID statistics handling
Browse files Browse the repository at this point in the history
  • Loading branch information
atefsaw committed Jun 21, 2023
1 parent e5dd8e2 commit e8d14e3
Showing 1 changed file with 48 additions and 1 deletion.
49 changes: 48 additions & 1 deletion rust/src/writer/stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ enum StatsScalar {
Decimal(f64),
String(String),
Bytes(Vec<u8>),
Uuid(uuid::Uuid),
}

impl StatsScalar {
Expand Down Expand Up @@ -241,6 +242,26 @@ impl StatsScalar {
let val = val / 10.0_f64.powi(*scale);
Ok(Self::Decimal(val))
}
(Statistics::FixedLenByteArray(v), Some(LogicalType::Uuid)) => {
let val = if use_min {
v.min_bytes()
} else {
v.max_bytes()
};

if val.len() != 16 {
return Err(DeltaWriterError::StatsParsingFailed {
debug_value: format!("{val:?}"),
logical_type: Some(LogicalType::Uuid),
});
}

let mut bytes = [0; 16];
bytes.copy_from_slice(val);

let val = uuid::Uuid::from_bytes(bytes);
Ok(Self::Uuid(val))
}
(stats, _) => Err(DeltaWriterError::StatsParsingFailed {
debug_value: format!("{stats:?}"),
logical_type: logical_type.clone(),
Expand Down Expand Up @@ -271,6 +292,7 @@ impl From<StatsScalar> for serde_json::Value {
let escaped_string = String::from_utf8(escaped_bytes).unwrap();
serde_json::Value::from(escaped_string)
}
StatsScalar::Uuid(v) => serde_json::Value::from(v.hyphenated().to_string()),
}
}
}
Expand Down Expand Up @@ -585,6 +607,20 @@ mod tests {
}),
Value::from(1243124142314.423),
),
(
simple_parquet_stat!(
Statistics::FixedLenByteArray,
FixedLenByteArray::from(
[
0xc2, 0xe8, 0xc7, 0xf7, 0xd1, 0xf9, 0x4b, 0x49, 0xa5, 0xd9, 0x4b, 0xfe,
0x75, 0xc3, 0x17, 0xe2
]
.to_vec()
)
),
Some(LogicalType::Uuid),
Value::from("c2e8c7f7-d1f9-4b49-a5d9-4bfe75c317e2"),
),
];

for (stats, logical_type, expected) in cases {
Expand Down Expand Up @@ -614,7 +650,7 @@ mod tests {
assert_eq!(add.len(), 1);
let stats = add[0].get_stats().unwrap().unwrap();

let min_max_keys = vec!["meta", "some_int", "some_string", "some_bool"];
let min_max_keys = vec!["meta", "some_int", "some_string", "some_bool", "uuid"];
let mut null_count_keys = vec!["some_list", "some_nested_list"];
null_count_keys.extend_from_slice(min_max_keys.as_slice());

Expand Down Expand Up @@ -646,6 +682,9 @@ mod tests {
("date", ColumnValueStat::Value(v)) => {
assert_eq!("2021-06-22", v.as_str().unwrap())
}
("uuid", ColumnValueStat::Value(v)) => {
assert_eq!("176c770d-92af-4a21-bf76-5d8c5261d659", v.as_str().unwrap())
}
_ => panic!("Key should not be present"),
}
}
Expand Down Expand Up @@ -674,6 +713,9 @@ mod tests {
("date", ColumnValueStat::Value(v)) => {
assert_eq!("2021-06-22", v.as_str().unwrap())
}
("uuid", ColumnValueStat::Value(v)) => {
assert_eq!("a98bea04-d119-4f21-8edc-eb218b5849af", v.as_str().unwrap())
}
_ => panic!("Key should not be present"),
}
}
Expand All @@ -700,6 +742,7 @@ mod tests {
("some_list", ColumnCountStat::Value(v)) => assert_eq!(100, *v),
("some_nested_list", ColumnCountStat::Value(v)) => assert_eq!(100, *v),
("date", ColumnCountStat::Value(v)) => assert_eq!(0, *v),
("uuid", ColumnCountStat::Value(v)) => assert_eq!(0, *v),
_ => panic!("Key should not be present"),
}
}
Expand Down Expand Up @@ -803,6 +846,7 @@ mod tests {
"nullable": true, "metadata": {}
},
{ "name": "date", "type": "string", "nullable": true, "metadata": {} },
{ "name": "uuid", "type": "string", "nullable": true, "metadata": {} },
]
});
static ref V0_COMMIT: String = {
Expand Down Expand Up @@ -847,6 +891,7 @@ mod tests {
"some_list": ["a", "b", "c"],
"some_nested_list": [[42], [84]],
"date": "2021-06-22",
"uuid": "176c770d-92af-4a21-bf76-5d8c5261d659",
}))
.take(100)
.chain(
Expand All @@ -867,6 +912,7 @@ mod tests {
"some_list": ["x", "y", "z"],
"some_nested_list": [[42], [84]],
"date": "2021-06-22",
"uuid": "54f3e867-3f7b-4122-a452-9d74fb4fe1ba",
}))
.take(100),
)
Expand All @@ -884,6 +930,7 @@ mod tests {
},
"some_nested_list": [[42], null],
"date": "2021-06-22",
"uuid": "a98bea04-d119-4f21-8edc-eb218b5849af",
}))
.take(100),
)
Expand Down

0 comments on commit e8d14e3

Please sign in to comment.