diff --git a/csrc/cutlass_extensions/vllm_numeric_conversion.cuh b/csrc/cutlass_extensions/vllm_numeric_conversion.cuh index 49c08b0ec5824..4ab75dd081a5b 100644 --- a/csrc/cutlass_extensions/vllm_numeric_conversion.cuh +++ b/csrc/cutlass_extensions/vllm_numeric_conversion.cuh @@ -21,7 +21,7 @@ struct InterleavedNumericArrayConverter { CUTLASS_DEVICE static result_type convert(source_type const& source) { CUTE_INVALID_CONTROL_PATH( - "InterleavedNumericArrayConverter not impleted\n"); + "InterleavedNumericArrayConverter not implemented\n"); return {}; } @@ -73,7 +73,7 @@ struct ArrayConverterPacked32Bit { // Maybe not Valid,. ScalarConverter will not actually work unless // NumericConverter is implemented - // but it won't be used since we assert N % 2 == 0, jus here for compliance + // but it won't be used since we assert N % 2 == 0, just here for compliance // with VectorizedConverter using ScalarConverter = NumericConverter; diff --git a/csrc/quantization/machete/machete_mainloop.cuh b/csrc/quantization/machete/machete_mainloop.cuh index 7654634fc22ab..f11105c041f70 100644 --- a/csrc/quantization/machete/machete_mainloop.cuh +++ b/csrc/quantization/machete/machete_mainloop.cuh @@ -1343,7 +1343,7 @@ struct MacheteCollectiveMma { // We need to cast to nv_bfloat16 for the multiply since // `cutlass::bfloat16_t` has an overloaded operator* that upconverts to - // float, which nvcc will not optimize to useing vectorized fma + // float, which nvcc will not optimize to using vectorized fma // instructions (i.e. hfma.bf16_v2) if constexpr (std::is_same_v) { cute::transform(