Skip to content

Commit

Permalink
Add caml_stat_char_array_{to,of}_os
Browse files Browse the repository at this point in the history
Allows conversion of arbitrary char/wchar_t sequences by allowing the
number of characters copied to be specified. The output length of the
encoding can also be returned by these functions.
  • Loading branch information
dra27 committed Oct 22, 2024
1 parent 20e2242 commit ca2f53f
Show file tree
Hide file tree
Showing 6 changed files with 102 additions and 15 deletions.
12 changes: 7 additions & 5 deletions Changes
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,13 @@ Working version

### Runtime system:

* #11449, #13497: Add caml_stat_char_array_{to,of}_os functions allowing
conversion of string data which may contain NUL characters. Correct
implementation of caml_stat_strdup_to_utf16 to raise Out_of_memory instead of
returning of NULL (the behaviour of caml_stat_strdup_to_os was inconsistent
between Unix/Windows).
(David Allsopp, review by Nick Barnes, Antonin Décimo and Miod Vallat)

- #13352: Concurrency refactors and cleanups.
(Antonin Décimo, review by Gabriel Scherer, David Allsopp, and Miod Vallat)

Expand All @@ -30,11 +37,6 @@ Working version
entries found in ld.conf.
(David Allsopp, review by Stephen Dolan)

* #13497: Correct implementation of caml_stat_strdup_to_utf16 to raise
Out_of_memory instead of returning of NULL (the behaviour of
caml_stat_strdup_to_os was inconsistent between Unix/Windows).
(David Allsopp, review by Nick Barnes, Antonin Décimo and Miod Vallat)

- #13500: Add frame pointers support for ARM64 on Linux and macOS.
(Tim McGilchrist, review by KC Sivaramakrishnan, Fabrice Buoro
and Miod Vallat)
Expand Down
8 changes: 8 additions & 0 deletions runtime/caml/memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,14 @@ CAMLalloc(caml_stat_free, 1)
CAMLextern wchar_t* caml_stat_wcsdup_noexc(const wchar_t *s);
#endif

/* [caml_stat_memdup(s, size, &out_size)] returns a copy of the first [size]
bytes of [s]. If [out_size] is not [NULL], then [size] is stored in
[*out_size]. This function is the "dummy" Unix implementation of the
Windows-only functions caml_stat_char_array_{to,from}_utf16.
*/
CAMLextern caml_stat_string caml_stat_memdup(const char *s, asize_t size,
asize_t *out_size);

/* [caml_stat_strconcat(nargs, strings)] concatenates null-terminated [strings]
(an array of [char*] of size [nargs]) into a new string, dropping all NULs,
except for the very last one. It throws an OCaml exception in case the
Expand Down
4 changes: 4 additions & 0 deletions runtime/caml/misc.h
Original file line number Diff line number Diff line change
Expand Up @@ -526,6 +526,8 @@ extern double caml_log1p(double);
#define caml_stat_strdup_noexc_to_os caml_stat_strdup_noexc_to_utf16
#define caml_stat_strdup_of_os caml_stat_strdup_of_utf16
#define caml_stat_strdup_noexc_of_os caml_stat_strdup_noexc_of_utf16
#define caml_stat_char_array_to_os caml_stat_char_array_to_utf16
#define caml_stat_char_array_of_os caml_stat_char_array_of_utf16
#define caml_copy_string_of_os caml_copy_string_of_utf16

#else /* _WIN32 */
Expand Down Expand Up @@ -569,6 +571,8 @@ extern double caml_log1p(double);
#define caml_stat_strdup_noexc_to_os caml_stat_strdup_noexc
#define caml_stat_strdup_of_os caml_stat_strdup
#define caml_stat_strdup_noexc_of_os caml_stat_strdup_noexc
#define caml_stat_char_array_to_os caml_stat_memdup
#define caml_stat_char_array_of_os caml_stat_memdup
#define caml_copy_string_of_os caml_copy_string

#endif /* _WIN32 */
Expand Down
25 changes: 25 additions & 0 deletions runtime/caml/osdeps.h
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,31 @@ CAMLextern char* caml_stat_strdup_noexc_of_utf16(const wchar_t *s);
*/
CAMLextern char* caml_stat_strdup_of_utf16(const wchar_t *s);

/* [caml_stat_char_array_to_utf16(s, size, &out_size)] returns a copy of the
first [size] bytes of [s] re-encoded in UTF-16. The encoding of [s] is
assumed to be UTF-8 if [caml_windows_unicode_runtime_enabled] is non-zero
**and** [s] is valid UTF-8, or the current Windows code page otherwise. If
[out_size] is not [NULL], then the number of UTF-16 code units in the result
is recorded in [*out_size].
The returned buffer is allocated with [caml_stat_alloc], so it should be
freed using [caml_stat_free].
*/
CAMLextern wchar_t *caml_stat_char_array_to_utf16(const char *s, size_t size,
size_t *out_size);

/* [caml_stat_char_array_of_utf16(s, size, &out_size)] returns a copy of the
first [size] UTF-16 code units of [s] re-encoded in UTF-8 if
[caml_windows_unicode_runtime_enabled] is non-zero or the current Windows
code page otherwise. If [out_size] is not [NULL], then the size of the result
in bytes recorded in [*out_size].
The returned buffer is allocated with [caml_stat_alloc], so it should be
freed using [caml_stat_free].
*/
CAMLextern char *caml_stat_char_array_of_utf16(const wchar_t *s, size_t size,
size_t *out_size);

/* [caml_copy_string_of_utf16(s)] returns an OCaml string containing a copy of
[s] re-encoded in UTF-8 if [caml_windows_unicode_runtime_enabled] is non-zero
or in the current code page otherwise.
Expand Down
11 changes: 11 additions & 0 deletions runtime/memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -729,6 +729,17 @@ CAMLexport caml_stat_string caml_stat_strdup(const char *s)
return result;
}

CAMLexport caml_stat_string caml_stat_memdup(const char *s, asize_t size,
asize_t *out_size)
{
CAMLassert(size > 0);
caml_stat_block result = caml_stat_alloc(size);
memcpy(result, s, size);
if (out_size != NULL)
*out_size = size;
return result;
}

#ifdef _WIN32

CAMLexport wchar_t * caml_stat_wcsdup_noexc(const wchar_t *s)
Expand Down
57 changes: 47 additions & 10 deletions runtime/win32.c
Original file line number Diff line number Diff line change
Expand Up @@ -951,41 +951,68 @@ CAMLexport value caml_copy_string_of_utf16(const wchar_t *s)
return v;
}

CAMLexport wchar_t* caml_stat_strdup_noexc_to_utf16(const char *s)
Caml_inline wchar_t *char_array_to_utf16_noexc(const char *s,
int slen, size_t *out_size)
{
wchar_t * ws;
int retcode;

retcode = caml_win32_multi_byte_to_wide_char(s, -1, NULL, 0);
ws = caml_stat_alloc_noexc(retcode * sizeof(*ws));
if (ws != NULL)
caml_win32_multi_byte_to_wide_char(s, -1, ws, retcode);
retcode = caml_win32_multi_byte_to_wide_char(s, slen, NULL, 0);
ws = caml_stat_alloc_noexc(retcode * sizeof(wchar_t));
if (ws != NULL) {
caml_win32_multi_byte_to_wide_char(s, slen, ws, retcode);
if (out_size != NULL)
*out_size = retcode;
}

return ws;
}

CAMLexport wchar_t* caml_stat_strdup_to_utf16(const char *s)
CAMLexport wchar_t *caml_stat_strdup_noexc_to_utf16(const char *s)
{
return char_array_to_utf16_noexc(s, -1, NULL);
}

CAMLexport wchar_t *caml_stat_strdup_to_utf16(const char *s)
{
wchar_t out = caml_stat_strdup_noexc_to_utf16(s);
wchar_t *out = caml_stat_strdup_noexc_to_utf16(s);
if (out == NULL)
caml_raise_out_of_memory();
return out;
}

CAMLexport caml_stat_string caml_stat_strdup_noexc_of_utf16(const wchar_t *s)
CAMLexport wchar_t *caml_stat_char_array_to_utf16(const char *s, size_t size,
size_t *out_size)
{
wchar_t *out = char_array_to_utf16_noexc(s, size, out_size);
if (out == NULL)
caml_raise_out_of_memory();
return out;
}

Caml_inline caml_stat_string char_array_of_utf16_noexc(const wchar_t *s,
int slen,
size_t *out_size)
{
caml_stat_string out;
int retcode;

retcode = caml_win32_wide_char_to_multi_byte(s, -1, NULL, 0);
retcode = caml_win32_wide_char_to_multi_byte(s, slen, NULL, 0);
out = caml_stat_alloc_noexc(retcode);
if (out != NULL) {
caml_win32_wide_char_to_multi_byte(s, -1, out, retcode);
caml_win32_wide_char_to_multi_byte(s, slen, out, retcode);
if (out_size != NULL)
*out_size = retcode;
}

return out;
}

CAMLexport caml_stat_string caml_stat_strdup_noexc_of_utf16(const wchar_t *s)
{
return char_array_of_utf16_noexc(s, -1, NULL);
}

CAMLexport caml_stat_string caml_stat_strdup_of_utf16(const wchar_t *s)
{
caml_stat_string out = caml_stat_strdup_noexc_of_utf16(s);
Expand All @@ -994,6 +1021,16 @@ CAMLexport caml_stat_string caml_stat_strdup_of_utf16(const wchar_t *s)
return out;
}

CAMLexport caml_stat_string caml_stat_char_array_of_utf16(const wchar_t *s,
size_t size,
size_t *out_size)
{
caml_stat_string out = char_array_of_utf16_noexc(s, size, out_size);
if (out == NULL)
caml_raise_out_of_memory();
return out;
}

void caml_probe_win32_version(void)
{
/* Determine the version of Windows we're running, and cache it */
Expand Down

0 comments on commit ca2f53f

Please sign in to comment.