Skip to content
This repository has been archived by the owner on Jan 23, 2023. It is now read-only.

Expose a Utf8String type. #17872

Closed
wants to merge 58 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
58 commits
Select commit Hold shift + click to select a range
6b90660
Expose a Utf8String type.
atsushikan May 3, 2018
fabbe5d
Fix Unix build breaks
atsushikan May 3, 2018
f8639fa
Incorporate PR feedback
atsushikan May 3, 2018
bcbbbb9
Fix allocation size & remove the manual managed code trappings
atsushikan May 3, 2018
e4f9a43
GetPinnableReference and fix x86 build
atsushikan May 3, 2018
b5f7f59
Resolve merge conflicts
atsushikan May 4, 2018
9e13d11
Merge with master
atsushikan May 7, 2018
624897f
Realign with System.String changes
atsushikan May 7, 2018
78e15a2
Merge with master
atsushikan May 8, 2018
5e800cd
Merge with master
atsushikan May 9, 2018
10db0e3
Merge from master
atsushikan May 10, 2018
3618be1
Merge from master
atsushikan May 11, 2018
6cbb3f8
Merge from master
atsushikan May 14, 2018
2cff568
Faster Utf8 string allocator.
atsushikan May 14, 2018
7758492
Merge from master
atsushikan May 15, 2018
663fe01
Merge with master
atsushikan May 16, 2018
4f78043
Merge with master
atsushikan May 17, 2018
e8e82db
Merge with master
atsushikan May 18, 2018
a00b751
Bring up the Utf8 constructors
atsushikan May 18, 2018
bbdcae4
Block RuntimeHelpers.GetUninitializedObject()
atsushikan May 18, 2018
a382e1d
Incorporate PR feedback
atsushikan May 18, 2018
56ea91c
No longer need private constructor
atsushikan May 18, 2018
4279139
Put the readonly back.
atsushikan May 18, 2018
a3ce65a
Make MethodBase.Invoke aware of Utf8String's specialness
atsushikan May 18, 2018
24a8071
Merge with master
atsushikan May 21, 2018
e16f421
Merge with master
atsushikan May 22, 2018
5633b36
Merge with master
atsushikan May 23, 2018
0711ac1
Merge with master
atsushikan May 24, 2018
1119a96
Merge with master
atsushikan May 25, 2018
3c85d80
Merge with master
atsushikan May 29, 2018
5907d1c
Merge with master
atsushikan May 30, 2018
049f0ed
Merge with master
atsushikan May 31, 2018
603193a
Merge with master
atsushikan Jun 1, 2018
f6d811e
Merge with master
atsushikan Jun 4, 2018
6825173
Merge with master
atsushikan Jun 5, 2018
b9b8040
Fix build break
atsushikan Jun 5, 2018
103fce3
Merge with master
atsushikan Jun 6, 2018
f997aaa
Merge with master
atsushikan Jun 7, 2018
d026198
Merge with master
atsushikan Jun 8, 2018
493c27f
Merge with master
atsushikan Jun 11, 2018
5eb53cc
Merge with master
atsushikan Jun 12, 2018
6cf128f
Merge with master
atsushikan Jun 13, 2018
dd11e73
Merge with master
atsushikan Jun 14, 2018
82ec9cb
Merge with master
atsushikan Jun 15, 2018
4a01601
Merge with master
atsushikan Jun 18, 2018
4ff38a1
Merge with master
atsushikan Jun 19, 2018
fab65e9
Merge with master
atsushikan Jun 20, 2018
d6258a4
Merge with master
atsushikan Jun 21, 2018
21f47d3
Merge with master
atsushikan Jun 22, 2018
4d60318
Merge with master
atsushikan Jun 25, 2018
39a4e2e
Merge with master
atsushikan Jun 26, 2018
279a4c5
Merge with master
atsushikan Jun 27, 2018
b247f32
Merge with master
atsushikan Jun 28, 2018
cfe461b
Merge with master
atsushikan Jun 29, 2018
2f4ad81
Merge with master
atsushikan Jul 2, 2018
11af9eb
Merge with master
atsushikan Jul 3, 2018
00e7d26
Merge with master
atsushikan Jul 5, 2018
5e9651f
Merge with master
atsushikan Jul 9, 2018
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/classlibnative/bcltype/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ set(BCLTYPE_SOURCES
objectnative.cpp
stringnative.cpp
system.cpp
utf8stringnative.cpp
varargsnative.cpp
variant.cpp
)
Expand Down
51 changes: 51 additions & 0 deletions src/classlibnative/bcltype/utf8stringnative.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
//
// File: StringNative.cpp
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does not match the actual name. Better to just delete - it is useless comment.

//

//
// Purpose: The implementation of the Utf8String class.
//

//

#include "common.h"

#include "object.h"
#include "utilcode.h"
#include "excep.h"
#include "frames.h"
#include "field.h"
#include "vars.hpp"
#include "utf8stringnative.h"
#include "comutilnative.h"
#include "metasig.h"
#include "excep.h"

// Compile the string functionality with these pragma flags (equivalent of the command line /Ox flag)
// Compiling this functionality differently gives us significant throughout gain in some cases.
#if defined(_MSC_VER) && defined(_TARGET_X86_)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is copy&paste that does not make sense given what's in this file.

#pragma optimize("tgy", on)
#endif

FCIMPL1(Utf8StringObject *, Utf8StringFastAllocate, DWORD length)
{
FCALL_CONTRACT;

UTF8STRINGREF rv = NULL; // not protected

HELPER_METHOD_FRAME_BEGIN_RET_1(rv);
rv = SlowAllocateUtf8String(length);
HELPER_METHOD_FRAME_END();

return UTF8STRINGREFToObject(rv);
}
FCIMPLEND


// Revert to command line compilation flags
#if defined(_MSC_VER) && defined(_TARGET_X86_)
#pragma optimize ("", on)
#endif
24 changes: 24 additions & 0 deletions src/classlibnative/bcltype/utf8stringnative.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
//
// File: StringNative.h
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here

//

//
// Purpose: Contains types and method signatures for the Utf8String class
//

//

#include "fcall.h"
#include "qcall.h"
#include "excep.h"

#ifndef _UTF8STRINGNATIVE_H_
#define _UTF8STRINGNATIVE_H_

FCDECL1(Utf8StringObject *, Utf8StringFastAllocate, DWORD length);

#endif // _UTF8STRINGNATIVE_H_

1 change: 1 addition & 0 deletions src/inc/dacvars.h
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ DEFINE_DACVAR(ULONG, UNKNOWN_POINTER_TYPE, dac__g_pObjectClass, ::g_pObjectClass
DEFINE_DACVAR(ULONG, UNKNOWN_POINTER_TYPE, dac__g_pRuntimeTypeClass, ::g_pRuntimeTypeClass)
DEFINE_DACVAR(ULONG, UNKNOWN_POINTER_TYPE, dac__g_pCanonMethodTableClass, ::g_pCanonMethodTableClass)
DEFINE_DACVAR(ULONG, UNKNOWN_POINTER_TYPE, dac__g_pStringClass, ::g_pStringClass)
DEFINE_DACVAR(ULONG, UNKNOWN_POINTER_TYPE, dac__g_pUtf8StringClass, ::g_pUtf8StringClass)
DEFINE_DACVAR(ULONG, UNKNOWN_POINTER_TYPE, dac__g_pArrayClass, ::g_pArrayClass)
DEFINE_DACVAR(ULONG, UNKNOWN_POINTER_TYPE, dac__g_pSZArrayHelperClass, ::g_pSZArrayHelperClass)
DEFINE_DACVAR(ULONG, UNKNOWN_POINTER_TYPE, dac__g_pNullableClass, ::g_pNullableClass)
Expand Down
3 changes: 2 additions & 1 deletion src/mscorlib/System.Private.CoreLib.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,7 @@
<Compile Include="$(BclSourcesRoot)\System\TypeNameParser.cs" />
<Compile Include="$(BclSourcesRoot)\System\TypedReference.cs" />
<Compile Include="$(BclSourcesRoot)\System\TypeLoadException.cs" />
<Compile Include="$(BclSourcesRoot)\System\Utf8String.cs" />
<Compile Include="$(BclSourcesRoot)\System\ValueType.cs" />
<Compile Include="$(BclSourcesRoot)\System\WeakReference.cs" />
<Compile Include="$(BclSourcesRoot)\System\WeakReferenceOfT.cs" />
Expand Down Expand Up @@ -667,4 +668,4 @@
</ItemGroup>
<Import Project="ILLink.targets" />
<Import Project="GenerateCompilerResponseFile.targets" />
</Project>
</Project>
26 changes: 26 additions & 0 deletions src/mscorlib/src/System/Utf8String.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System.Runtime.CompilerServices;

namespace System
{
// This is an experimental type and not referenced from CoreFx but needs to exists and be public so we can prototype in CoreFxLab.
public sealed class Utf8String
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As we add interfaces (IEnumerable, IComparable, ...) to this during development, will we need to touch the VM code at all? That is, does the VM need to know about every possible member that sits on this type?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not necessarily about interfaces. But the VM and JIT will certainly need to know more about this type to get decent performance. It goes back to my question on what you expect to get by adding this stub to CoreLib.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jkotas You're right in that we're not going to see full performance benchmarks until JIT support comes online, but this does get us at least partway there. Even when not JIT-optimized, the supplied allocation routine will still be faster and allocate less overall heap memory than calling two different ctors for a container + separate array.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could just be a struct wrapper with array as its single field?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@benaadams We considered this for prototype purposes. But the end result is to have the final type be a class, and any data we'd collect in the meantime from a struct stand-in would be suspect.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the supplied allocation routine will still be faster

That is not correct. The allocation routine in this PR is super slow naive implementation. I just tried:

class MyUtf8String
{
   byte[] _payload;

   static public MyUtf8String FastAllocate(int length) { var ret = new MyUtf8String(); ret._payload = new byte[length+1]; return ret; }
}

Run Utf8String.FastAllocate vs. MyUtf8String.FastAllocate in a loop. MyUtf8String loop is faster.

I am with Stephen that it would be best for a rough early prototype code like this to stay in a branch and not be merged into master.

FWIW, Span<T> was also developed in a branch and only integrated to master once it was sufficiently along and we agreed to ship it for real: #7886.

{
// Do not reorder these fields. Must match layout of Utf8StringObject in object.h.
private int _length;
[CLSCompliant(false)]
public byte _firstByte; // TODO: Is public for experimentation in CoreFxLab. Will be private in its ultimate form.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you really need this to be public? It would be better to have readonly byte GetPinnableReference() method - that is the proper public method we will want to have eventually.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It allows the code in CoreFxLab to be written as instance members of Utf8String itself would be.

Though if GetPinnableReference() adds no overhead, I'm fine with it. This PR is unblock @GrabYourPitchforks so I'll let him make the call here.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, use GetPinnableReference instead if you wish. The only overhead I can think of is that an implicit null check may be emitted, but the JIT is generally good about eliding these where possible. If we find stray null checks we can open JIT bugs.


private Utf8String() { } // Suppress creation of the public constructor. No one actually calls this.

public int Length => _length;

// Creates a new zero-initialized instance of the specified length. Actual storage allocated is "length + 1" bytes (the extra
// +1 is for the NUL terminator.)
[MethodImpl(MethodImplOptions.InternalCall)]
public static extern Utf8String FastAllocate(int length); //TODO: Is public for experimentation in CoreFxLab. Will be private in its ultimate form.
}
}
1 change: 1 addition & 0 deletions src/strongname/api/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ typedef DPTR(class ReJitManager) PTR_ReJitManager;
typedef DPTR(struct ReJitInfo) PTR_ReJitInfo;
typedef DPTR(struct SharedReJitInfo) PTR_SharedReJitInfo;
typedef DPTR(class StringObject) PTR_StringObject;
typedef DPTR(class Utf8StringObject) PTR_Utf8StringObject;
typedef DPTR(class TypeHandle) PTR_TypeHandle;
#ifdef STUB_DISPATCH
typedef VPTR(class VirtualCallStubManager) PTR_VirtualCallStubManager;
Expand Down
5 changes: 5 additions & 0 deletions src/vm/appdomain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2762,6 +2762,11 @@ void SystemDomain::LoadBaseSystemClasses()
_ASSERTE(g_pStringClass->GetBaseSize() == ObjSizeOf(StringObject)+sizeof(WCHAR));
_ASSERTE(g_pStringClass->GetComponentSize() == 2);

// Load Utf8String
g_pUtf8StringClass = MscorlibBinder::GetClass(CLASS__UTF8_STRING);
_ASSERTE(g_pUtf8StringClass->GetBaseSize() == ObjSizeOf(Utf8StringObject)+sizeof(BYTE));
_ASSERTE(g_pUtf8StringClass->GetComponentSize() == 1);

// Used by Buffer::BlockCopy
g_pByteArrayMT = ClassLoader::LoadArrayTypeThrowing(
TypeHandle(MscorlibBinder::GetElementType(ELEMENT_TYPE_U1))).AsArray()->GetMethodTable();
Expand Down
2 changes: 2 additions & 0 deletions src/vm/classnames.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,8 @@
#define g_TransparentProxyName "__TransparentProxy"
#define g_TypeClassName "System.Type"

#define g_Utf8StringName "Utf8String"

#define g_VariantClassName "System.Variant"
#define g_GuidClassName "System.Guid"

Expand Down
1 change: 1 addition & 0 deletions src/vm/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ typedef DPTR(struct ReJitInfo) PTR_ReJitInfo;
typedef DPTR(struct SharedReJitInfo) PTR_SharedReJitInfo;
typedef DPTR(class StringObject) PTR_StringObject;
typedef DPTR(class TypeHandle) PTR_TypeHandle;
typedef DPTR(class Utf8StringObject) PTR_Utf8StringObject;
typedef VPTR(class VirtualCallStubManager) PTR_VirtualCallStubManager;
typedef VPTR(class VirtualCallStubManagerManager) PTR_VirtualCallStubManagerManager;
typedef VPTR(class IGCHeap) PTR_IGCHeap;
Expand Down
5 changes: 5 additions & 0 deletions src/vm/ecalllist.h
Original file line number Diff line number Diff line change
Expand Up @@ -1055,6 +1055,10 @@ FCFuncStart(gUriMarshalerFuncs)
FCFuncElement("CreateNativeUriInstanceHelper", StubHelpers::UriMarshaler__CreateNativeUriInstance)
FCFuncEnd()

FCFuncStart(gUtf8StringFuncs)
FCFuncElement("FastAllocate", Utf8StringFastAllocate)
FCFuncEnd()

FCFuncStart(gEventArgsMarshalerFuncs)
QCFuncElement("CreateNativeNCCEventArgsInstanceHelper", StubHelpers::EventArgsMarshaler__CreateNativeNCCEventArgsInstance)
QCFuncElement("CreateNativePCEventArgsInstance", StubHelpers::EventArgsMarshaler__CreateNativePCEventArgsInstance)
Expand Down Expand Up @@ -1384,6 +1388,7 @@ FCClassElement("TypedReference", "System", gTypedReferenceFuncs)
#ifdef FEATURE_COMINTEROP
FCClassElement("UriMarshaler", "System.StubHelpers", gUriMarshalerFuncs)
#endif
FCClassElement("Utf8String", "System", gUtf8StringFuncs)
FCClassElement("ValueClassMarshaler", "System.StubHelpers", gValueClassMarshalerFuncs)
FCClassElement("ValueType", "System", gValueTypeFuncs)
#ifdef FEATURE_COMINTEROP
Expand Down
67 changes: 67 additions & 0 deletions src/vm/gchelpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1059,6 +1059,73 @@ STRINGREF SlowAllocateString( DWORD cchStringLength )
return( ObjectToSTRINGREF(orObject) );
}

UTF8STRINGREF SlowAllocateUtf8String(DWORD cchStringLength)
{
CONTRACTL{
THROWS;
GC_TRIGGERS;
MODE_COOPERATIVE; // returns an objref without pinning it => cooperative
} CONTRACTL_END;

Utf8StringObject *orObject = NULL;

#ifdef _DEBUG
if (g_pConfig->ShouldInjectFault(INJECTFAULT_GCHEAP))
{
char *a = new char;
delete a;
}
#endif

// Limit the maximum string size to <2GB to mitigate risk of security issues caused by 32-bit integer
// overflows in buffer size calculations.
if (cchStringLength > 0x7FFFFFDF)
ThrowOutOfMemory();

SIZE_T ObjectSize = PtrAlign(Utf8StringObject::GetSize(cchStringLength));
Copy link
Member

@jkotas jkotas May 3, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this allocates more than required.

I have noticed that we may have the same problem in regular string. For example:
new string('a', 5) allocates 0x28 bytes objects on x64 today. But it should be enough for it to allocate just 0x20 bytes:

8 syncblock
8 vtable
4 size
5*2 content
2 zero terminator

Do you know why it is the case?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have checked CoreRT. CoreRT allocates 0x20 bytes in this case, so there is definitely something pretty broken.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't there some padding between size and content?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is padding for regular arrays (so that all arrays have same layout even when elements are 8-byte aligned). There is no padding for strings. The content starts right after length. The extra unnecessary bytes are at the end.

I remember folks went to a great length to ensure that strings do not pay the extra 4 bytes during the initial 64-bit ports. It looks like it has regressed.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's coming from sizeof(Utf8StringObject) - there should be a pragma pack 4 around that declaration.

Presumably the same for StringObject, though I want to keep anything like that separate from this PR. We haven't yet agreed to merge this into master.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#17876 has the fix for the String overallocation problem.

_ASSERTE(ObjectSize > cchStringLength);

SetTypeHandleOnThreadForAlloc(TypeHandle(g_pUtf8StringClass));

orObject = (Utf8StringObject *)Alloc(ObjectSize, FALSE, FALSE);

// Object is zero-init already
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This includes the null terminator being zero-inited, I suppose?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep.

_ASSERTE(orObject->HasEmptySyncBlockInfo());

// Initialize Object
orObject->SetMethodTable(g_pUtf8StringClass);
orObject->SetUtf8StringLength(cchStringLength);

if (ObjectSize >= LARGE_OBJECT_SIZE)
{
GCHeapUtilities::GetGCHeap()->PublishObject((BYTE*)orObject);
}

// Notify the profiler of the allocation
if (TrackAllocations())
{
OBJECTREF objref = ObjectToOBJECTREF((Object*)orObject);
GCPROTECT_BEGIN(objref);
ProfilerObjectAllocatedCallback(objref, (ClassID)orObject->GetTypeHandle().AsPtr());
GCPROTECT_END();

orObject = (Utf8StringObject *)OBJECTREFToObject(objref);
}

#ifdef FEATURE_EVENT_TRACE
// Send ETW event for allocation
if (ETW::TypeSystemLog::IsHeapAllocEventEnabled())
{
ETW::TypeSystemLog::SendObjectAllocatedEvent(orObject);
}
#endif // FEATURE_EVENT_TRACE

LogAlloc(ObjectSize, g_pUtf8StringClass, orObject);

return(ObjectToUTF8STRINGREF(orObject));
}


#ifdef FEATURE_COMINTEROP_UNMANAGED_ACTIVATION
// OBJECTREF AllocateComClassObject(ComClassFactory* pComClsFac)
void AllocateComClassObject(ComClassFactory* pComClsFac, OBJECTREF* ppRefClass)
Expand Down
4 changes: 4 additions & 0 deletions src/vm/gchelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ STRINGREF AllocateString( DWORD cchStringLength );
// The slow version, implemented in gcscan.cpp
STRINGREF SlowAllocateString( DWORD cchStringLength );

UTF8STRINGREF SlowAllocateUtf8String( DWORD cchStringLength );

#else

// On other platforms, go to the (somewhat less efficient) implementations in gcscan.cpp
Expand All @@ -83,6 +85,8 @@ OBJECTREF AllocateObjectArray(DWORD cElements, TypeHandle ElementType, BOOL bAll

STRINGREF SlowAllocateString( DWORD cchStringLength );

UTF8STRINGREF SlowAllocateUtf8String( DWORD cchStringLength );

inline STRINGREF AllocateString( DWORD cchStringLength )
{
WRAPPER_NO_CONTRACT;
Expand Down
2 changes: 1 addition & 1 deletion src/vm/methodtable.h
Original file line number Diff line number Diff line change
Expand Up @@ -1882,7 +1882,7 @@ class MethodTable
BOOL IsString()
{
LIMITED_METHOD_DAC_CONTRACT;
return HasComponentSize() && !IsArray();
return HasComponentSize() && !IsArray() && RawGetComponentSize() == 2;
}

BOOL HasComponentSize() const
Expand Down
11 changes: 11 additions & 0 deletions src/vm/methodtablebuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9744,6 +9744,17 @@ void MethodTableBuilder::CheckForSystemTypes()

pMT->SetComponentSize(2);
}
else if (strcmp(name, g_Utf8StringName) == 0 && strcmp(nameSpace, g_SystemNS) == 0)
{
// Utf8Strings are not "normal" objects, so we need to mess with their method table a bit
// so that the GC can figure out how big each string is...
DWORD baseSize = ObjSizeOf(Utf8StringObject) + sizeof(BYTE);
pMT->SetBaseSize(baseSize); // NULL character included

GetHalfBakedClass()->SetBaseSizePadding(baseSize - bmtFP->NumInstanceFieldBytes);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

GetHalfBakedClass is this a real method 🤣

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

method function (it's c++) 😉


pMT->SetComponentSize(1);
}
else if (strcmp(name, g_CriticalFinalizerObjectName) == 0 && strcmp(nameSpace, g_ConstrainedExecutionNS) == 0)
{
// To introduce a class with a critical finalizer,
Expand Down
1 change: 1 addition & 0 deletions src/vm/mscorlib.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
//
#include "arraynative.h"
#include "stringnative.h"
#include "utf8stringnative.h"
#include "objectnative.h"
#include "comdelegate.h"
#include "customattribute.h"
Expand Down
2 changes: 2 additions & 0 deletions src/vm/mscorlib.h
Original file line number Diff line number Diff line change
Expand Up @@ -884,6 +884,8 @@ DEFINE_METHOD(STRING, INTERNAL_COPY, InternalCopy,
DEFINE_METHOD(STRING, WCSLEN, wcslen, SM_PtrChar_RetInt)
DEFINE_PROPERTY(STRING, LENGTH, Length, Int)

DEFINE_CLASS(UTF8_STRING, System, Utf8String)

DEFINE_CLASS(STRING_BUILDER, Text, StringBuilder)
DEFINE_PROPERTY(STRING_BUILDER, LENGTH, Length, Int)
DEFINE_PROPERTY(STRING_BUILDER, CAPACITY, Capacity, Int)
Expand Down
30 changes: 30 additions & 0 deletions src/vm/object.h
Original file line number Diff line number Diff line change
Expand Up @@ -866,6 +866,7 @@ typedef DPTR(U8Array) PTR_U8Array;
typedef DPTR(PTRArray) PTR_PTRArray;

class StringObject;
class Utf8StringObject;

#ifdef USE_CHECKED_OBJECTREFS
typedef REF<ArrayBase> BASEARRAYREF;
Expand All @@ -883,6 +884,7 @@ typedef REF<U8Array> U8ARRAYREF;
typedef REF<CHARArray> CHARARRAYREF;
typedef REF<PTRArray> PTRARRAYREF; // Warning: Use PtrArray only for single dimensional arrays, not multidim arrays.
typedef REF<StringObject> STRINGREF;
typedef REF<Utf8StringObject> UTF8STRINGREF;

#else // USE_CHECKED_OBJECTREFS

Expand All @@ -901,6 +903,7 @@ typedef PTR_U8Array U8ARRAYREF;
typedef PTR_CHARArray CHARARRAYREF;
typedef PTR_PTRArray PTRARRAYREF; // Warning: Use PtrArray only for single dimensional arrays, not multidim arrays.
typedef PTR_StringObject STRINGREF;
typedef PTR_Utf8StringObject UTF8STRINGREF;

#endif // USE_CHECKED_OBJECTREFS

Expand Down Expand Up @@ -935,6 +938,7 @@ typedef PTR_StringObject STRINGREF;
#ifdef _MSC_VER
#pragma warning(disable : 4200) // disable zero-sized array warning
#endif

class StringObject : public Object
{
#ifdef DACCESS_COMPILE
Expand Down Expand Up @@ -1205,6 +1209,32 @@ class ReflectClassBaseObject : public BaseObjectWithCachedData

};

class Utf8StringObject : public Object
{
#ifdef DACCESS_COMPILE
friend class ClrDataAccess;
#endif

private:
DWORD m_StringLength;
BYTE m_Characters[0];
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the String over-allocation fix, I have changed this to m_FirstChar to make it look like the managed side, and not depend on pragmas that control packing and non-standard extensions.

// GC will see a Utf8StringObject like this:
// DWORD m_StringLength
// BYTE m_Characters[0]
// DWORD m_OptionalPadding (this is an optional field and will appear based on need)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment about m_OptionalPadding does not make sense to me. Looks like left over from times when System.String was done differently.


public:
VOID SetUtf8StringLength(DWORD len) { LIMITED_METHOD_CONTRACT; _ASSERTE(len >= 0); m_StringLength = len; }
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be called just SetLength, I think. The managed side has Length property, not Utf8Length property.


protected:
Utf8StringObject() { LIMITED_METHOD_CONTRACT; }
~Utf8StringObject() { LIMITED_METHOD_CONTRACT; }

public:
static SIZE_T GetSize(DWORD stringLength);
};


// This is the Method version of the Reflection object.
// A Method has adddition information.
// m_pMD - A pointer to the actual MethodDesc of the method.
Expand Down
Loading
You are viewing a condensed version of this merge commit. You can view the full changes here.