Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
rcoh committed Oct 6, 2023
1 parent 99c5a57 commit 2cdf652
Show file tree
Hide file tree
Showing 3 changed files with 63,140 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,108 @@ package software.amazon.smithy.rust.codegen.core.util
import software.amazon.smithy.utils.CaseUtils
import software.amazon.smithy.utils.StringUtils

fun String.doubleQuote(): String = StringUtils.escapeJavaString(this, "").replace(Regex("""\\u([0-9a-f]{4})""")) { matchResult: MatchResult ->
"\\u{" + matchResult.groupValues[1] + "}" as CharSequence
}
fun String.doubleQuote(): String =
StringUtils.escapeJavaString(this, "").replace(Regex("""\\u([0-9a-f]{4})""")) { matchResult: MatchResult ->
"\\u{" + matchResult.groupValues[1] + "}" as CharSequence
}

/**
* Double quote a string, e.g. "abc" -> "\"abc\""
*/
fun String.dq(): String = this.doubleQuote()

private fun String.splitOnWordBoundaries(): List<String> {
// adapted from Java v2 SDK CodegenNamingUtils.splitOnWordBoundaries
var result = this

// all non-alphanumeric characters: "acm-success"-> "acm success"
result = result.replace(Regex("[^A-Za-z0-9+]"), " ")

// if a number has a standalone v in front of it, separate it out
result = result.replace(Regex("([^a-z]{2,})v([0-9]+)"), "$1 v$2 ") // TESTv4 -> "TEST v4 "
.replace(Regex("([^A-Z]{2,})V([0-9]+)"), "$1 V$2 ") // TestV4 -> "Test V4 "

// add a space between camelCased words
result = result.split(Regex("(?<=[a-z])(?=[A-Z]([a-zA-Z]|[0-9]))"))
.joinToString(separator = " ") // AcmSuccess -> // "Acm Success"

// add a space after acronyms. `[a-qt-z]` omits s
result = result.replace(Regex("([A-Z]+)([A-Z][a-z])"), "$1 $2") // "(ACM)(Success)" -> "ACM Success"

// add space after a number in the middle of a word
result = result.replace(Regex("([0-9])([a-zA-Z])"), "$1 $2") // "s3ec2" -> "s3 ec2"

// remove extra spaces - multiple consecutive ones or those and the beginning/end of words
result = result.replace(Regex("\\s+"), " ") // "Foo Bar" -> "Foo Bar"
.trim() // " Foo " -> "Foo"

return result.split(" ")
}

fun String.splitOnWordBoundariesV2(): List<String> {
val out = mutableListOf<String>()
val completeWords = listOf("ipv4", "ipv6", "sigv4", "emv2000", "mib", "gib", "ttl")
var current = ""
this.windowed(3, partialWindows = true).forEachIndexed { index, window ->
val c = window[0]
val peek = window.getOrNull(1)
val doublePeek = window.getOrNull(2)
val allLowerCase = this.lowercase() == this
val prev = this.getOrNull(index - 1)
val completeWordInProgress = completeWords.any {
(current + this.substring(index)).lowercase().startsWith(
it,
)
} && !completeWords.contains(current.lowercase())
if (!c.isLetterOrDigit()) {
out += current
current = if (c.isLetterOrDigit()) {
c.toString()
} else {
""
}
} else if (current.isNotEmpty() &&
(current.last().isLowerCase() || current.last().isDigit()) &&
(c.isUpperCase() || (c.isLowerCase() && current.last().isDigit() && allLowerCase)) &&
!completeWordInProgress
) {
// current: sha || sha256
// c == 'Sum'
out += current
current = c.toString()
} else if (c.isUpperCase() && prev?.isUpperCase() == true && peek?.isLowerCase() == true && (peek != 's' || (doublePeek != null && doublePeek.isLowerCase())) && !(peek == 'v' && doublePeek?.isDigit() == true)) {
// e.g. DB[P]roxy but not `AR[N]s` or `I[P]V4`
// c = P, current = db
out += current
current = c.toString()
} else {
if (c == 'v' && peek?.isDigit() == true && !completeWordInProgress) {
out += current
current = c.toString()
} else {
current += c.toString()
}
}
}
if (current.isNotEmpty()) {
out += current
}
return out.filter { it.isNotEmpty() }.map { it.lowercase() }
}

// String extensions
fun String.toSnakeCase(): String {
return CaseUtils.toSnakeCase(this)
fun String.toSnakeCaseV3(): String {
return this.splitOnWordBoundariesV2().joinToString("_") { it.lowercase() }
}

fun String.toSnakeCaseV2(): String {
return splitOnWordBoundaries().joinToString("_") { it.lowercase() }
}

fun String.toSnakeCaseLegacy(): String = CaseUtils.toSnakeCase(this)

fun String.toSnakeCase() = toSnakeCaseV3()

fun String.toPascalCase(): String {
return CaseUtils.toSnakeCase(this).let { CaseUtils.toPascalCase(it) }
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@ package software.amazon.smithy.rust.codegen.core.util

import io.kotest.matchers.shouldBe
import org.junit.jupiter.api.Test
import org.junit.jupiter.api.extension.ExtensionContext
import org.junit.jupiter.params.ParameterizedTest
import org.junit.jupiter.params.provider.Arguments
import org.junit.jupiter.params.provider.ArgumentsProvider
import org.junit.jupiter.params.provider.ArgumentsSource
import java.util.stream.Stream

internal class StringsTest {

Expand All @@ -18,4 +24,57 @@ internal class StringsTest {
"{\"nested\": \"{\\\"nested\\\": 5}\"}\"}"
""".trimIndent().trim()
}

@Test
fun correctlyConvertToSnakeCase() {
"NotificationARNs".toSnakeCase() shouldBe "notification_arns"
}

@Test
fun testAllNames() {
val allNames = this::class.java.getResource("/allNames.txt")?.readText()!!
allNames.lines().forEach {
val current = it.toSnakeCase()
val others = listOf(it.toSnakeCaseV2(), it.toSnakeCaseV3())
if (others.any { it != current }) {
println("$it before: $current => ${others.toSet()}")
}
}
}

@ParameterizedTest
@ArgumentsSource(TestCasesProvider::class)
fun testSnakeCase(input: String, output: String) {
input.toSnakeCaseLegacy() shouldBe output
}
}

class TestCasesProvider : ArgumentsProvider {
override fun provideArguments(context: ExtensionContext?): Stream<out Arguments> =
listOf(
"ACLs" to "acls",
"ACLsUpdateStatus" to "acls_update_status",
"AllowedAllVPCs" to "allowed_all_vpcs",
"BluePrimaryX" to "blue_primary_x",
"CIDRs" to "cidrs",
"AuthTtL" to "auth_ttl",
"CNAMEPrefix" to "cname_prefix",
"S3Location" to "s3_location",
"signatureS" to "signature_s",
"signatureR" to "signature_r",
"M3u8Settings" to "m3u8_settings",
"IAMUser" to "iam_user",
"OtaaV1_0_x" to "otaa_v1_0_x",
"DynamoDBv2Action" to "dynamo_db_v2_action",
"SessionKeyEmv2000" to "session_key_emv2000",
"SupportsClassB" to "supports_class_b",
"UnassignIpv6AddressesRequest" to "unassign_ipv6_addresses_request",
"TotalGpuMemoryInMiB" to "total_gpu_memory_in_mib",
"WriteIOs" to "write_ios",
"dynamoDBv2" to "dynamo_db_v2",
"ipv4Address" to "ipv4_address",
"sigv4" to "sigv4",
"s3key" to "s3_key",
"sha256sum" to "sha256_sum",
).map { Arguments.of(it.first, it.second) }.stream()
}
Loading

0 comments on commit 2cdf652

Please sign in to comment.