Skip to content

Commit

Permalink
Merge pull request #13 from plos-clan/dev-copi143
Browse files Browse the repository at this point in the history
修改magic的实现
  • Loading branch information
copi143 authored Nov 6, 2024
2 parents 683603b + 7d7a85e commit b0e8c5e
Show file tree
Hide file tree
Showing 7 changed files with 515 additions and 777 deletions.
24 changes: 12 additions & 12 deletions include/libc-base/string/str.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,18 +84,18 @@ OAPI char *strerror_r(int e, char *buf, size_t n) __THROW; // GNU 版本

//; 自定义函数

OAPI u32 utf8to32c(cstr8 *sp);
OAPI size_t utf8to32s(u32 *d, cstr8 s);
OAPI u32 *utf8to32a(cstr8 s);
OAPI u32 utf16to32c(cstr16 *sp);
OAPI size_t utf16to32s(u32 *d, cstr16 s);
OAPI u32 *utf16to32a(cstr16 s);
OAPI size_t utf32to8c(u32 c, u8 *s);
OAPI size_t utf32to8s(u8 *d, cstr32 s);
OAPI u8 *utf32to8a(cstr32 s);
OAPI size_t utf32to16c(u32 c, u16 *s);
OAPI size_t utf32to16s(u16 *d, cstr32 s);
OAPI u16 *utf32to16a(cstr32 s);
OAPI u32 utf8to32c(cstr8 *sp);
OAPI size_t utf8to32s(u32 *d, cstr8 s);
OAPI u32 *utf8to32a(cstr8 s);
OAPI u32 utf16to32c(cstr16 *sp);
OAPI size_t utf16to32s(u32 *d, cstr16 s);
OAPI u32 *utf16to32a(cstr16 s);
OAPI ssize_t utf32to8c(u32 c, u8 *s);
OAPI size_t utf32to8s(u8 *d, cstr32 s);
OAPI u8 *utf32to8a(cstr32 s);
OAPI ssize_t utf32to16c(u32 c, u16 *s);
OAPI size_t utf32to16s(u16 *d, cstr32 s);
OAPI u16 *utf32to16a(cstr32 s);

#undef IAPI
#undef OAPI
Expand Down
22 changes: 22 additions & 0 deletions include/magic.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,26 @@
#pragma once
#include <define.h>

enum {
MIME_UNKNOWN,
MIME_TEXT,
MIME_IMAGE,
MIME_AUDIO,
MIME_VIDEO,
MIME_APPLICATION,
MIME_FONT,
MIME_MESSAGE,
MIME_MODEL,
MIME_MULTIPART,
};

typedef struct filetype {
const i32 id; // ID
const i32 type; // 类型
const cstr mime; // MIME 类型
const cstr desc; // 描述
} *filetype_t;

dlimport cstr filetype(const void *data, size_t size);

dlexport cstr filetype_by_ext(cstr ext);
89 changes: 81 additions & 8 deletions src/libc-base/string/charset.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ u32 utf8to32c(cstr8 *sp) {
}

size_t utf8to32s(u32 *d, cstr8 s) {
u32 *_d = d;
const u32 *_d = d;
while (*s != '\0') {
*d++ = utf8to32c(&s);
}
Expand Down Expand Up @@ -73,7 +73,7 @@ u32 utf16to32c(cstr16 *sp) {
}

size_t utf16to32s(u32 *d, cstr16 s) {
u32 *_d = d;
const u32 *_d = d;
while (*s != '\0') {
*d++ = utf16to32c(&s);
}
Expand All @@ -88,7 +88,7 @@ u32 *utf16to32a(cstr16 s) {
return realloc(r, (n + 1) * 4);
}

size_t utf32to8c(u32 c, u8 *s) {
ssize_t utf32to8c(u32 c, u8 *s) {
if (c <= 0x7f) {
s[0] = c;
return 1;
Expand All @@ -113,9 +113,9 @@ size_t utf32to8c(u32 c, u8 *s) {
}

size_t utf32to8s(u8 *d, cstr32 s) {
u8 *_d = d;
const u8 *_d = d;
while (*s != '\0') {
size_t n = utf32to8c(*s++, d);
ssize_t n = utf32to8c(*s++, d);
if (n < 0) {
*d++ = 0xef;
*d++ = 0xbf;
Expand All @@ -135,7 +135,7 @@ u8 *utf32to8a(cstr32 s) {
return realloc(r, n + 1);
}

size_t utf32to16c(u32 c, u16 *s) {
ssize_t utf32to16c(u32 c, u16 *s) {
if (c <= 0xffff) {
s[0] = c;
return 1;
Expand All @@ -150,9 +150,9 @@ size_t utf32to16c(u32 c, u16 *s) {
}

size_t utf32to16s(u16 *d, cstr32 s) {
u16 *_d = d;
const u16 *_d = d;
while (*s != '\0') {
size_t n = utf32to16c(*s++, d);
ssize_t n = utf32to16c(*s++, d);
if (n < 0) {
*d++ = 0xfffd;
} else {
Expand All @@ -169,3 +169,76 @@ u16 *utf32to16a(cstr32 s) {
size_t n = utf32to16s(r, s);
return realloc(r, (n + 1) * 2);
}

bool is_vaild_utf32(u32 c) {
return c <= 0x10ffff && (c < 0xd800 || c > 0xdfff);
}

bool is_vaild_utf32s(cstr32 s) {
while (*s != '\0') {
if (!is_vaild_utf32(*s++)) return false;
}
return true;
}

size_t count_invalid_utf32s(cstr32 s) {
size_t n = 0;
while (*s != '\0') {
if (!is_vaild_utf32(*s++)) n++;
}
return n;
}

size_t remove_invalid_utf32s(u32 *d, cstr32 s) {
const u32 *_d = d;
while (*s != '\0') {
u32 c = *s++;
if (is_vaild_utf32(c)) *d++ = c;
}
*d = '\0';
return d - _d;
}

size_t count_invalid_utf16s(cstr16 s) {
size_t n = 0;
while (*s != '\0') {
u32 c = utf16to32c(&s);
if (!is_vaild_utf32(c)) n++;
}
return n;
}

size_t remove_invalid_utf16s(u16 *d, cstr16 s) {
const u16 *_d = d;
while (*s != '\0') {
u32 c = utf16to32c(&s);
if (is_vaild_utf32(c)) {
ssize_t n = utf32to16c(c, d);
if (n > 0) d += n;
}
}
*d = '\0';
return d - _d;
}

size_t count_invalid_utf8s(cstr8 s) {
size_t n = 0;
while (*s != '\0') {
u32 c = utf8to32c(&s);
if (!is_vaild_utf32(c)) n++;
}
return n;
}

size_t remove_invalid_utf8s(u8 *d, cstr8 s) {
const u8 *_d = d;
while (*s != '\0') {
u32 c = utf8to32c(&s);
if (is_vaild_utf32(c)) {
ssize_t n = utf32to8c(c, d);
if (n > 0) d += n;
}
}
*d = '\0';
return d - _d;
}
Loading

0 comments on commit b0e8c5e

Please sign in to comment.