diff --git a/csrc/cutlass_extensions/vllm_numeric_conversion.cuh b/csrc/cutlass_extensions/vllm_numeric_conversion.cuh
index 49c08b0ec5824..4ab75dd081a5b 100644
--- a/csrc/cutlass_extensions/vllm_numeric_conversion.cuh
+++ b/csrc/cutlass_extensions/vllm_numeric_conversion.cuh
@@ -21,7 +21,7 @@ struct InterleavedNumericArrayConverter {
   CUTLASS_DEVICE
   static result_type convert(source_type const& source) {
     CUTE_INVALID_CONTROL_PATH(
-        "InterleavedNumericArrayConverter not impleted\n");
+        "InterleavedNumericArrayConverter not implemented\n");
     return {};
   }
 
@@ -73,7 +73,7 @@ struct ArrayConverterPacked32Bit {
 
   // Maybe not Valid,. ScalarConverter will not actually work unless
   //  NumericConverter<T, S, Round> is implemented
-  // but it won't be used since we assert N % 2 == 0, jus here for compliance
+  // but it won't be used since we assert N % 2 == 0, just here for compliance
   // with VectorizedConverter
   using ScalarConverter = NumericConverter<T, S>;
 
diff --git a/csrc/quantization/machete/machete_mainloop.cuh b/csrc/quantization/machete/machete_mainloop.cuh
index 7654634fc22ab..f11105c041f70 100644
--- a/csrc/quantization/machete/machete_mainloop.cuh
+++ b/csrc/quantization/machete/machete_mainloop.cuh
@@ -1343,7 +1343,7 @@ struct MacheteCollectiveMma {
 
       // We need to cast to nv_bfloat16 for the multiply since
       // `cutlass::bfloat16_t` has an overloaded operator* that upconverts to
-      // float, which nvcc will not optimize to useing vectorized fma
+      // float, which nvcc will not optimize to using vectorized fma
       // instructions (i.e. hfma.bf16_v2)
       if constexpr (std::is_same_v<ElementScale, cutlass::bfloat16_t>) {
         cute::transform(