Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Character map #1

Closed
wants to merge 5 commits into from
Closed

Character map #1

wants to merge 5 commits into from

Conversation

MihaZupan
Copy link
Owner

@MihaZupan MihaZupan commented Apr 7, 2020

A quick benchmark on the dataset I use (https://github.com/dotnet/docs/tree/master/docs/core) shows a 35.9 > 34.7 ms change, so ~ 3-3,5%.

The codegen diff for IndexOfOpeningCharacter is

IndexOfOpeningCharacter(System.String, Int32, Int32)
    L0000: push rdi
    L0001: push rsi
    L0002: push rbp
    L0003: push rbx
    L0004: sub rsp, 0x48
    L0008: vzeroupper
    L000b: mov rsi, rcx
    L000e: lea rdi, [rsp+0x20]
    L0013: mov ecx, 0xa
    L0018: xor eax, eax
    L001a: rep stosd
    L001c: mov rcx, rsi
    L001f: mov rax, 0xb55cfaa848e5
    L0029: mov [rsp+0x40], rax
    L002e: mov esi, r9d
    L0031: mov eax, r8d
    L0034: mov rdi, rcx
    L0037: lea rcx, [rdi+0x20]
    L003b: vmovdqu xmm0, [rcx]
    L003f: vmovdqu [rsp+0x20], xmm0
    L0045: test rdx, rdx
    L0048: jnz L004e
    L004a: xor ebx, ebx
    L004c: jmp L005c
    L004e: add rdx, 0xc
    L0052: mov [rsp+0x38], rdx
    L0057: mov rbx, [rsp+0x38]
    L005c: cmp qword [rdi+0x10], 0x0
    L0061: jnz L00ba
    L0063: cmp eax, esi
    L0065: jg L010b
    L006b: movsxd rcx, eax
    L006e: movzx ecx, word [rbx+rcx*2]
    L0072: cmp ecx, 0x80
    L0078: jge L0092
    L007a: lea rdx, [rsp+0x20]
    L007f: mov r8d, ecx
    L0082: sar r8d, 0x5
    L0086: movsxd r8, r8d
    L0089: mov edx, [rdx+r8*4]
    L008d: bt edx, ecx
    L0090: jb L009a
    L0092: inc eax
    L0094: cmp eax, esi
    L0096: jle L006b
    L0098: jmp L010b
    L009a: mov rcx, 0xb55cfaa848e5
    L00a4: cmp [rsp+0x40], rcx
    L00a9: jz L00b0
    L00ab: call 0x7ffa70dc0780
    L00b0: nop
    L00b1: add rsp, 0x48
    L00b5: pop rbx
    L00b6: pop rbp
    L00b7: pop rsi
    L00b8: pop rdi
    L00b9: ret
    L00ba: mov ebp, eax
    L00bc: cmp ebp, esi
    L00be: jg L010b
    L00c0: movsxd rcx, ebp
    L00c3: movzx edx, word [rbx+rcx*2]
    L00c7: cmp edx, 0x80
    L00cd: jl L00e8
    L00cf: mov rcx, [rdi+0x10]
    L00d3: mov eax, [rcx]
    L00d5: call System.Collections.Generic.Dictionary`2[[System.Char, System.Private.CoreLib],[System.__Canon, System.Private.CoreLib]].FindEntry(Char)
    L00da: test eax, eax
    L00dc: setge al
    L00df: movzx eax, al
    L00e2: test eax, eax
    L00e4: jz L0105
    L00e6: jmp L0137
    L00e8: lea rax, [rsp+0x20]
    L00ed: mov ecx, edx
    L00ef: sar ecx, 0x5
    L00f2: movsxd rcx, ecx
    L00f5: mov eax, [rax+rcx*4]
    L00f8: bt eax, edx
    L00fb: setb al
    L00fe: movzx eax, al
    L0101: test eax, eax
    L0103: jnz L0137
    L0105: inc ebp
    L0107: cmp ebp, esi
    L0109: jle L00c0
    L010b: xor eax, eax
    L010d: mov [rsp+0x38], rax
    L0112: mov eax, 0xffffffff
    L0117: mov rcx, 0xb55cfaa848e5
    L0121: cmp [rsp+0x40], rcx
    L0126: jz L012d
    L0128: call 0x7ffa70dc0780
    L012d: nop
    L012e: add rsp, 0x48
    L0132: pop rbx
    L0133: pop rbp
    L0134: pop rsi
    L0135: pop rdi
    L0136: ret
    L0137: mov eax, ebp
    L0139: mov rcx, 0xb55cfaa848e5
    L0143: cmp [rsp+0x40], rcx
    L0148: jz L014f
    L014a: call 0x7ffa70dc0780
    L014f: nop
    L0150: add rsp, 0x48
    L0154: pop rbx
    L0155: pop rbp
    L0156: pop rsi
    L0157: pop rdi
    L0158: ret

PR

IndexOfOpeningCharacter(System.String, Int32, Int32)
    L0000: cmp qword [rcx+0x10], 0x0
    L0005: jnz L0041
    L0007: mov eax, [rdx]
    L0009: add rdx, 0xc
    L000d: cmp r8d, r9d
    L0010: jg L0037
    L0012: movsxd rax, r8d
    L0015: movzx eax, word [rdx+rax*2]
    L0019: cmp rax, 0x80
    L001f: jae L002f
    L0021: lea r10, [rcx+0x20]
    L0025: cmp [r10], r10d
    L0028: cmp byte [r10+rax], 0x0
    L002d: jnz L003d
    L002f: inc r8d
    L0032: cmp r8d, r9d
    L0035: jle L0012
    L0037: mov eax, 0xffffffff
    L003c: ret
    L003d: mov eax, r8d
    L0040: ret
    L0041: mov rax, 0x7ffa18c500f8
    L004b: jmp rax

IndexOfOpeningCharacterNonAscii(System.String, Int32, Int32)
    L0000: push rdi
    L0001: push rsi
    L0002: push rbp
    L0003: push rbx
    L0004: sub rsp, 0x28
    L0008: mov rdi, rcx
    L000b: mov esi, r9d
    L000e: mov ecx, [rdx]
    L0010: add rdx, 0xc
    L0014: mov rbx, rdx
    L0017: mov ebp, r8d
    L001a: cmp ebp, esi
    L001c: jg L005c
    L001e: movsxd rcx, ebp
    L0021: movzx edx, word [rbx+rcx*2]
    L0025: cmp edx, 0x80
    L002b: jl L0046
    L002d: mov rcx, [rdi+0x10]
    L0031: mov eax, [rcx]
    L0033: call System.Collections.Generic.Dictionary`2[[System.UInt32, System.Private.CoreLib],[System.__Canon, System.Private.CoreLib]].FindEntry(UInt32)
    L0038: test eax, eax
    L003a: setge al
    L003d: movzx eax, al
    L0040: test eax, eax
    L0042: jz L0056
    L0044: jmp L006a
    L0046: lea rax, [rdi+0x20]
    L004a: cmp [rax], eax
    L004c: mov edx, edx
    L004e: movzx eax, byte [rax+rdx]
    L0052: test eax, eax
    L0054: jnz L006a
    L0056: inc ebp
    L0058: cmp ebp, esi
    L005a: jle L001e
    L005c: mov eax, 0xffffffff
    L0061: add rsp, 0x28
    L0065: pop rbx
    L0066: pop rbp
    L0067: pop rsi
    L0068: pop rdi
    L0069: ret
    L006a: mov eax, ebp
    L006c: add rsp, 0x28
    L0070: pop rbx
    L0071: pop rbp
    L0072: pop rsi
    L0073: pop rdi
    L0074: ret

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

1 participant