From 79f28d452ccc2321e2eb56e36100e5981f3e0233 Mon Sep 17 00:00:00 2001 From: Takashi Suwa Date: Tue, 26 Mar 2024 22:44:19 +0900 Subject: [PATCH 01/13] make `ScriptDataMap` and `LineBreakDataMap` tracked by contexts (midway) --- src/backend/horzBox.ml | 40 +------- src/backend/lineBreak.ml | 18 ++-- src/chardecoder/convertText.ml | 14 +-- src/chardecoder/lineBreakDataMap.ml | 33 ++++--- src/chardecoder/lineBreakDataMap.mli | 10 +- src/chardecoder/scriptDataMap.ml | 119 +++-------------------- src/chardecoder/scriptDataMap.mli | 9 +- src/chardecoder/scriptHandler.ml | 140 +++++++++++++++++++++++++++ src/chardecoder/scriptHandler.mli | 12 +++ src/frontend/primitives.cppo.ml | 2 + tools/gencode/vminst.ml | 4 +- 11 files changed, 219 insertions(+), 182 deletions(-) create mode 100644 src/chardecoder/scriptHandler.ml create mode 100644 src/chardecoder/scriptHandler.mli diff --git a/src/backend/horzBox.ml b/src/backend/horzBox.ml index 81c4168e1..02b224970 100644 --- a/src/backend/horzBox.ml +++ b/src/backend/horzBox.ml @@ -158,8 +158,12 @@ type context_main = { [@printer (fun fmt _ -> Format.fprintf fmt "")] dominant_wide_script : CharBasis.script; dominant_narrow_script : CharBasis.script; + script_map : ScriptDataMap.t; + [@printer (fun fmt _ -> Format.fprintf fmt "")] script_space_map : (float * float * float) CharBasis.ScriptSpaceMap.t; [@printer (fun fmt _ -> Format.fprintf fmt "")] + line_break_map : LineBreakDataMap.t; + [@printer (fun fmt _ -> Format.fprintf fmt "")] space_natural : float; space_shrink : float; space_stretch : float; @@ -508,42 +512,6 @@ and evaled_row = type column = cell list -let normalize_script ctx script_raw = - match script_raw with - | CharBasis.CommonNarrow - | CharBasis.Inherited -> - ctx.dominant_narrow_script - - | CharBasis.CommonWide -> - ctx.dominant_wide_script - - | _ -> script_raw - - -let get_font_with_ratio ctx script_raw = - let script = normalize_script ctx script_raw in - match ctx.font_scheme |> CharBasis.ScriptSchemeMap.find_opt script with - | None -> raise (FontIsNotSet{ raw = script_raw; normalized = script }) - | Some(fontsch) -> fontsch - - -let get_language_system ctx script_raw = - let script = normalize_script ctx script_raw in - match ctx.langsys_scheme |> CharBasis.ScriptSchemeMap.find_opt script with - | None -> CharBasis.NoLanguageSystem - | Some(langsys) -> langsys - - -let get_string_info ctx script_raw = - let (fontkey, ratio, rising_ratio) = get_font_with_ratio ctx script_raw in - { - font_key = fontkey; - text_font_size = ctx.font_size *% ratio; - text_color = ctx.text_color; - rising = ctx.manual_rising +% ctx.font_size *% rising_ratio; - } - - let get_metrics_of_evaled_horz_box ((wid, evhbmain) : evaled_horz_box) : length * length * length = let (hgt, dpt) = match evhbmain with diff --git a/src/backend/lineBreak.ml b/src/backend/lineBreak.ml index 64ce9f190..2f95217e3 100644 --- a/src/backend/lineBreak.ml +++ b/src/backend/lineBreak.ml @@ -251,14 +251,16 @@ let can_break_before tail = let is_whitespace_character (uch : Uchar.t) : bool = - match LineBreakDataMap.find uch with - | SP | INBR -> true - | _ -> false - (* -- - needs re-consideration: - it may be better to use the criterion of whether - the general category of the given character is `Zs` or not. - -- *) + match Uchar.to_int uch with + | 0x000A + | 0x000B + | 0x000C + | 0x000D + | 0x0020 + | 0x0085 -> true + | _ -> false + (* TODO: re-consider this; it may be better to use the criterion of whether + the general category of the given character is `Zs` or not. *) let rec omit_space_uchars (uchlst : Uchar.t list) : Uchar.t list = diff --git a/src/chardecoder/convertText.ml b/src/chardecoder/convertText.ml index da66288b3..970406ea7 100644 --- a/src/chardecoder/convertText.ml +++ b/src/chardecoder/convertText.ml @@ -6,8 +6,8 @@ open LineBreakBox let to_chunks (ctx : context_main) (uchs : Uchar.t list) (alw_last : break_opportunity) : break_opportunity * line_break_chunk list = - let (alw_first, tris) = LineBreakDataMap.append_break_opportunity uchs alw_last in - let scrs = ScriptDataMap.divide_by_script ctx tris in + let (alw_first, tris) = LineBreakDataMap.append_break_opportunity ctx.line_break_map uchs alw_last in + let scrs = ScriptHandler.divide_by_script ctx tris in let chunks = scrs |> List.map (fun chunkmain -> (ctx, chunkmain)) in (alw_first, chunks) @@ -56,7 +56,7 @@ let pure_space ctx : lb_pure_box = let get_corrected_font_size ctx script = - let (_, font_ratio, _) = get_font_with_ratio ctx script in + let (_, font_ratio, _) = ScriptHandler.get_font_with_ratio ctx script in ctx.font_size *% font_ratio @@ -136,7 +136,7 @@ let make_string_atom (hsinfo : horz_string_info) (uchsegs : uchar_segment list) (* Makes an alphabetic word or a CJK character. *) let inner_string (ctx : context_main) (script : script) (uchsegs : uchar_segment list) : lb_box list = - let hsinfo = get_string_info ctx script in + let hsinfo = ScriptHandler.get_string_info ctx script in match LoadHyph.lookup ctx.left_hyphen_min ctx.right_hyphen_min ctx.hyphen_dictionary uchsegs with | LoadHyph.Single(uchsegs) -> [ LBPure(make_string_atom hsinfo uchsegs) ] @@ -250,7 +250,7 @@ let space_between_chunks_pure info1 info2 : lb_pure_box list = (* Converts single CJK character, not depending on adjacent characters. *) let ideographic_single ctx script lbc (uchseg : uchar_segment) : lb_box list = let lphb_raw = - let hsinfo = get_string_info ctx script in + let hsinfo = ScriptHandler.get_string_info ctx script in LBPure(make_string_atom hsinfo [ uchseg ]) in let hwkern = halfwidth_kern ctx script in @@ -382,7 +382,7 @@ let chunks_to_boxes_pure (script_before : script) (chunklst : line_break_chunk l | AlphabeticChunk(script, lbcfirst, lbclast, uchsegs, alw) -> let opt = AccSome(((ctx, script, lbclast), alw)) in let lphblstmain = - let hsinfo = get_string_info ctx script in + let hsinfo = ScriptHandler.get_string_info ctx script in [ make_string_atom hsinfo uchsegs ] in begin @@ -403,7 +403,7 @@ let chunks_to_boxes_pure (script_before : script) (chunklst : line_break_chunk l | IdeographicChunk(script, lbc, uchseg, alw) -> let opt = AccSome(((ctx, script, lbc), alw)) in let lphblstmain = - let hsinfo = get_string_info ctx script in + let hsinfo = ScriptHandler.get_string_info ctx script in [ make_string_atom hsinfo [ uchseg ] ] in begin diff --git a/src/chardecoder/lineBreakDataMap.ml b/src/chardecoder/lineBreakDataMap.ml index 99ff2a984..ef3a6863b 100644 --- a/src/chardecoder/lineBreakDataMap.ml +++ b/src/chardecoder/lineBreakDataMap.ml @@ -103,10 +103,13 @@ let line_break_class_overriding_list = ] -let line_break_map_ref : (line_break_class BatIMap.t) ref = ref (BatIMap.empty ~eq:(=)) +type t = line_break_class BatIMap.t -let set_from_file (abspath : abs_path) = +let empty : t = BatIMap.empty ~eq:(=) + + +let make_from_file (abspath : abs_path) : t = let channel = open_in_abs abspath in let line_break_list = DataParser.main DataLexer.expr (Lexing.from_channel channel) in let line_break_map_raw = line_break_list |> CharBasis.map_of_list class_of_string in @@ -115,14 +118,14 @@ let set_from_file (abspath : abs_path) = mapacc |> BatIMap.add cp lbc ) line_break_map_raw line_break_class_overriding_list in - begin - line_break_map_ref := line_break_map; - end + line_break_map -let find uch = - try (!line_break_map_ref) |> BatIMap.find (Uchar.to_int uch) - with Not_found -> XX (* temporary *) +let find (uch : Uchar.t) (line_break_map : t) = + try + line_break_map |> BatIMap.find (Uchar.to_int uch) + with + | Not_found -> XX (* TODO: reconsider this *) let set lbclst = LBRESet(lbclst) @@ -312,8 +315,7 @@ let proj_bi (_, lbc) = lbc let proj_tri (_, lbc, _) = lbc -let append_property (uchlst : Uchar.t list) : (Uchar.t * line_break_class) list = - +let append_property (line_break_map : t) (uchlst : Uchar.t list) : (Uchar.t * line_break_class) list = let rec normalize biacc bilst = match bilst with | [] -> @@ -327,9 +329,8 @@ let append_property (uchlst : Uchar.t list) : (Uchar.t * line_break_class) list | Some(repl) -> normalize (Alist.append biacc repl) bitail end in - - let bilst = uchlst |> List.map (fun uch -> (uch, find uch)) in - normalize Alist.empty bilst + let bilst = uchlst |> List.map (fun uch -> (uch, line_break_map |> find uch)) in + normalize Alist.empty bilst type segment_record = { @@ -429,7 +430,7 @@ let cut_into_segment_record (bilst : (Uchar.t * line_break_class) list) : segmen let proj_segrcd segrcd = segrcd.line_break_class -let append_break_opportunity (uchlst : Uchar.t list) (alwlast : break_opportunity) : break_opportunity * line_break_element list = +let append_break_opportunity (line_break_map : t) (uchlst : Uchar.t list) (alwlast : break_opportunity) : break_opportunity * line_break_element list = let should_prevent_break (trirev : line_break_element list) segrcdlst = let alwopt = find_first_match line_break_rule proj_tri proj_segrcd trirev segrcdlst in @@ -479,7 +480,7 @@ let append_break_opportunity (uchlst : Uchar.t list) (alwlast : break_opportunit aux (Alist.extend triacc (uchseg, lbc, alw)) bitail end in - let bilst = append_property uchlst in + let bilst = append_property line_break_map uchlst in let segrcdlst = cut_into_segment_record bilst in let alw_first = @@ -487,7 +488,7 @@ let append_break_opportunity (uchlst : Uchar.t list) (alwlast : break_opportunit if b_first then PreventBreak else AllowBreak in let lst = aux Alist.empty segrcdlst in - (alw_first, lst) + (alw_first, lst) (* diff --git a/src/chardecoder/lineBreakDataMap.mli b/src/chardecoder/lineBreakDataMap.mli index 2c394922b..2a9f4258b 100644 --- a/src/chardecoder/lineBreakDataMap.mli +++ b/src/chardecoder/lineBreakDataMap.mli @@ -2,8 +2,12 @@ open MyUtil open CharBasis -val set_from_file : abs_path -> unit +type t -val find : Uchar.t -> line_break_class +val empty : t -val append_break_opportunity : Uchar.t list -> break_opportunity -> break_opportunity * line_break_element list +val make_from_file : abs_path -> t + +val find : Uchar.t -> t -> line_break_class + +val append_break_opportunity : t -> Uchar.t list -> break_opportunity -> break_opportunity * line_break_element list diff --git a/src/chardecoder/scriptDataMap.ml b/src/chardecoder/scriptDataMap.ml index 36c3f6cca..8710a7933 100644 --- a/src/chardecoder/scriptDataMap.ml +++ b/src/chardecoder/scriptDataMap.ml @@ -1,8 +1,6 @@ open MyUtil open CharBasis -open LineBreakBox -open HorzBox let read_east_asian_width _ data = @@ -43,10 +41,13 @@ let read_script eaw_map cp data = | _ -> OtherScript -let script_map_ref : (script BatIMap.t) ref = ref (BatIMap.empty ~eq:(=)) +type t = script BatIMap.t -let set_from_file (abspath_S : abs_path) (abspath_EAW : abs_path) = +let empty : t = BatIMap.empty ~eq:(=) + + +let make_from_file ~script:(abspath_S : abs_path) ~east_asian_width:(abspath_EAW : abs_path) : t = let eaw_map = let channel_EAW = open_in_abs abspath_EAW in let eaw_list = DataParser.main DataLexer.expr (Lexing.from_channel channel_EAW) in @@ -59,109 +60,13 @@ let set_from_file (abspath_S : abs_path) (abspath_EAW : abs_path) = close_in channel_S; script_list |> CharBasis.map_of_list (read_script eaw_map) in - begin - script_map_ref := script_map; - end + script_map -let find (ctx : context_main) ((uch, _) : uchar_segment) = +let find (uch : Uchar.t) (script_map : t) : script option = try - (!script_map_ref) |> BatIMap.find (Uchar.to_int uch) - |> normalize_script ctx - with Not_found -> OtherScript - - -let divide_by_script (ctx : context_main) (trilst : line_break_element list) : LineBreakBox.line_break_chunk_main list = - - let ideographic script lbc uchseg alw = - IdeographicChunk(script, lbc, uchseg, alw) - in - - let preword script lbcfirst lbclast uchseglst alw = - AlphabeticChunk(script, lbcfirst, lbclast, uchseglst, alw) - in - - let rec aux resacc (scraccopt : (line_break_class * script * line_break_class * uchar_segment list) option) trilst = - match trilst with - | [] -> - begin - match scraccopt with - | None -> - Alist.to_list resacc - - | Some((lbcfirst, scriptprev, lbcprev, uchsegacc)) -> - let chunk = preword scriptprev lbcfirst lbcprev (List.rev uchsegacc) PreventBreak in - Alist.to_list (Alist.extend resacc chunk) - end - - | (_uch, SP, alw) :: tritail -> - let chunkspace = - match alw with - | AllowBreak -> Space - | PreventBreak -> UnbreakableSpace - in - begin - match scraccopt with - | None -> - aux (Alist.extend resacc chunkspace) None tritail - - | Some((lbcfirst, scriptprev, lbcprev, uchsegacc)) -> - let chunkprev = preword scriptprev lbcfirst lbcprev (List.rev uchsegacc) PreventBreak in - aux (Alist.append resacc [chunkprev; chunkspace]) None tritail - end - - | (uchseg, lbc, alw) :: tritail -> - let script = find ctx uchseg in - if is_ideographic_class lbc then - (* temporary; whether 'AI' is ideographic or not should depend on the context *) - (* -- if the spotted character is ideographic -- *) - begin - match scraccopt with - | None -> - let chunkideo = ideographic script lbc uchseg alw in - aux (Alist.extend resacc chunkideo) None tritail - - | Some((lbcfirst, scriptprev, lbcprev, uchacc)) -> - (* -- if there accumulate some characters before the spotted character -- *) - let chunkideo = ideographic script lbc uchseg alw in - let chunkprev = preword scriptprev lbcfirst lbcprev (List.rev uchacc) PreventBreak in - aux (Alist.append resacc [chunkprev; chunkideo]) None tritail - end - else - begin - match alw with - | AllowBreak -> - (* -- if the spotted non-ideographic character allows line break after it -- *) - begin - match scraccopt with - | None -> - let chunk = preword script lbc lbc [uchseg] AllowBreak in - aux (Alist.extend resacc chunk) None tritail - - | Some((lbcfirst, scriptprev, lbcprev, uchsegacc)) -> - if script_equal scriptprev script then - let chunk = preword script lbcfirst lbc (List.rev (uchseg :: uchsegacc)) AllowBreak in - aux (Alist.extend resacc chunk) None tritail - else - let chunkprev = preword scriptprev lbcfirst lbcprev (List.rev uchsegacc) PreventBreak in - let chunk = preword script lbc lbc [uchseg] AllowBreak in - aux (Alist.append resacc [chunkprev; chunk]) None tritail - end - - | PreventBreak -> - begin - match scraccopt with - | None -> - aux resacc (Some((lbc, script, lbc, [uchseg]))) tritail - - | Some((lbcfirst, scriptprev, lbcprev, uchsegacc)) -> - if script_equal scriptprev script then - aux resacc (Some((lbcfirst, script, lbc, uchseg :: uchsegacc))) tritail - else - let chunkprev = preword scriptprev lbcfirst lbcprev (List.rev uchsegacc) PreventBreak in - aux (Alist.extend resacc chunkprev) (Some((lbc, script, lbc, [uchseg]))) tritail - end - end - in - - aux Alist.empty None trilst + let script = script_map |> BatIMap.find (Uchar.to_int uch) in + Some(script) + with + | Not_found -> + None diff --git a/src/chardecoder/scriptDataMap.mli b/src/chardecoder/scriptDataMap.mli index d3f57086f..d7982ec1c 100644 --- a/src/chardecoder/scriptDataMap.mli +++ b/src/chardecoder/scriptDataMap.mli @@ -1,8 +1,11 @@ open MyUtil open CharBasis -open HorzBox -val set_from_file : abs_path -> abs_path -> unit +type t -val divide_by_script : context_main -> line_break_element list -> LineBreakBox.line_break_chunk_main list +val empty : t + +val make_from_file : script:abs_path -> east_asian_width:abs_path -> t + +val find : Uchar.t -> t -> script option diff --git a/src/chardecoder/scriptHandler.ml b/src/chardecoder/scriptHandler.ml new file mode 100644 index 000000000..79964ce3c --- /dev/null +++ b/src/chardecoder/scriptHandler.ml @@ -0,0 +1,140 @@ + +open CharBasis +open LengthInterface +open LineBreakBox +open HorzBox + + +let normalize_script (ctx : context_main) (script_raw : script) : script = + match script_raw with + | CharBasis.CommonNarrow + | CharBasis.Inherited -> ctx.dominant_narrow_script + | CharBasis.CommonWide -> ctx.dominant_wide_script + | _ -> script_raw + + + +let find (ctx : context_main) ((uch, _) : uchar_segment) : script = + match ctx.script_map |> ScriptDataMap.find uch with + | None -> OtherScript + | Some(script) -> normalize_script ctx script + + +let divide_by_script (ctx : context_main) (trilst : line_break_element list) : line_break_chunk_main list = + + let ideographic script lbc uchseg alw = + IdeographicChunk(script, lbc, uchseg, alw) + in + + let preword script lbcfirst lbclast uchseglst alw = + AlphabeticChunk(script, lbcfirst, lbclast, uchseglst, alw) + in + + let rec aux resacc (scraccopt : (line_break_class * script * line_break_class * uchar_segment list) option) trilst = + match trilst with + | [] -> + begin + match scraccopt with + | None -> + Alist.to_list resacc + + | Some((lbcfirst, scriptprev, lbcprev, uchsegacc)) -> + let chunk = preword scriptprev lbcfirst lbcprev (List.rev uchsegacc) PreventBreak in + Alist.to_list (Alist.extend resacc chunk) + end + + | (_uch, SP, alw) :: tritail -> + let chunkspace = + match alw with + | AllowBreak -> Space + | PreventBreak -> UnbreakableSpace + in + begin + match scraccopt with + | None -> + aux (Alist.extend resacc chunkspace) None tritail + + | Some((lbcfirst, scriptprev, lbcprev, uchsegacc)) -> + let chunkprev = preword scriptprev lbcfirst lbcprev (List.rev uchsegacc) PreventBreak in + aux (Alist.append resacc [chunkprev; chunkspace]) None tritail + end + + | (uchseg, lbc, alw) :: tritail -> + let script = find ctx uchseg in + if is_ideographic_class lbc then + (* temporary; whether 'AI' is ideographic or not should depend on the context *) + (* -- if the spotted character is ideographic -- *) + begin + match scraccopt with + | None -> + let chunkideo = ideographic script lbc uchseg alw in + aux (Alist.extend resacc chunkideo) None tritail + + | Some((lbcfirst, scriptprev, lbcprev, uchacc)) -> + (* -- if there accumulate some characters before the spotted character -- *) + let chunkideo = ideographic script lbc uchseg alw in + let chunkprev = preword scriptprev lbcfirst lbcprev (List.rev uchacc) PreventBreak in + aux (Alist.append resacc [chunkprev; chunkideo]) None tritail + end + else + begin + match alw with + | AllowBreak -> + (* -- if the spotted non-ideographic character allows line break after it -- *) + begin + match scraccopt with + | None -> + let chunk = preword script lbc lbc [uchseg] AllowBreak in + aux (Alist.extend resacc chunk) None tritail + + | Some((lbcfirst, scriptprev, lbcprev, uchsegacc)) -> + if script_equal scriptprev script then + let chunk = preword script lbcfirst lbc (List.rev (uchseg :: uchsegacc)) AllowBreak in + aux (Alist.extend resacc chunk) None tritail + else + let chunkprev = preword scriptprev lbcfirst lbcprev (List.rev uchsegacc) PreventBreak in + let chunk = preword script lbc lbc [uchseg] AllowBreak in + aux (Alist.append resacc [chunkprev; chunk]) None tritail + end + + | PreventBreak -> + begin + match scraccopt with + | None -> + aux resacc (Some((lbc, script, lbc, [uchseg]))) tritail + + | Some((lbcfirst, scriptprev, lbcprev, uchsegacc)) -> + if script_equal scriptprev script then + aux resacc (Some((lbcfirst, script, lbc, uchseg :: uchsegacc))) tritail + else + let chunkprev = preword scriptprev lbcfirst lbcprev (List.rev uchsegacc) PreventBreak in + aux (Alist.extend resacc chunkprev) (Some((lbc, script, lbc, [uchseg]))) tritail + end + end + in + + aux Alist.empty None trilst + + +let get_font_with_ratio (ctx : context_main) (script_raw : script) = + let script = normalize_script ctx script_raw in + match ctx.font_scheme |> CharBasis.ScriptSchemeMap.find_opt script with + | None -> raise (FontIsNotSet{ raw = script_raw; normalized = script }) + | Some(fontsch) -> fontsch + + +let get_language_system (ctx : context_main) (script_raw : script) = + let script = normalize_script ctx script_raw in + match ctx.langsys_scheme |> CharBasis.ScriptSchemeMap.find_opt script with + | None -> CharBasis.NoLanguageSystem + | Some(langsys) -> langsys + + +let get_string_info (ctx : context_main) (script_raw : script) = + let (fontkey, ratio, rising_ratio) = get_font_with_ratio ctx script_raw in + { + font_key = fontkey; + text_font_size = ctx.font_size *% ratio; + text_color = ctx.text_color; + rising = ctx.manual_rising +% ctx.font_size *% rising_ratio; + } diff --git a/src/chardecoder/scriptHandler.mli b/src/chardecoder/scriptHandler.mli new file mode 100644 index 000000000..e1ac3cfcb --- /dev/null +++ b/src/chardecoder/scriptHandler.mli @@ -0,0 +1,12 @@ + +open CharBasis +open LineBreakBox +open HorzBox + +val divide_by_script : context_main -> line_break_element list -> line_break_chunk_main list + +val get_font_with_ratio : context_main -> script -> font_with_ratio + +val get_language_system : context_main -> script -> language_system + +val get_string_info : context_main -> script -> horz_string_info diff --git a/src/frontend/primitives.cppo.ml b/src/frontend/primitives.cppo.ml index d1def3229..5d4d92905 100644 --- a/src/frontend/primitives.cppo.ml +++ b/src/frontend/primitives.cppo.ml @@ -650,7 +650,9 @@ let get_pdf_mode_initial_context wid = dominant_wide_script = CharBasis.OtherScript; dominant_narrow_script = CharBasis.OtherScript; langsys_scheme = CharBasis.ScriptSchemeMap.empty; + script_map = ScriptDataMap.empty; script_space_map = default_script_space_map; + line_break_map = LineBreakDataMap.empty; space_natural = 0.33; space_shrink = 0.08; space_stretch = 0.16; diff --git a/tools/gencode/vminst.ml b/tools/gencode/vminst.ml index bc998a549..b6ee8eddf 100644 --- a/tools/gencode/vminst.ml +++ b/tools/gencode/vminst.ml @@ -1391,7 +1391,7 @@ Context(HorzBox.({ ctx with font_scheme = font_scheme_new; }), ctxsub) ] ~is_pdf_mode_primitive:true ~code:{| -let fontwr = HorzBox.get_font_with_ratio ctx script in +let fontwr = ScriptHandler.get_font_with_ratio ctx script in make_font_with_ratio_value fontwr |} ; inst "PrimitiveSetMathFont" @@ -1484,7 +1484,7 @@ Context(HorzBox.({ ctx with ] ~is_pdf_mode_primitive:true ~code:{| -let langsys = HorzBox.get_language_system ctx script in +let langsys = ScriptHandler.get_language_system ctx script in make_language_system_value langsys |} ; inst "PrimitiveSetTextColor" From da0b4cf5052108e4e623221c8ecb3af5a10fe078 Mon Sep 17 00:00:00 2001 From: Takashi Suwa Date: Tue, 26 Mar 2024 23:07:43 +0900 Subject: [PATCH 02/13] add `set-unicode-char-database` and `load-unicode-char-database` --- src/frontend/display.ml | 1 + src/frontend/evalUtil.ml | 5 +++++ src/frontend/fontInfo.ml | 5 ++++- src/frontend/fontInfo.mli | 2 +- src/frontend/main.ml | 16 +++++++-------- src/frontend/primitives.cppo.ml | 1 + src/frontend/types.cppo.ml | 4 ++++ tools/gencode/type.ml | 1 + tools/gencode/type.mli | 1 + tools/gencode/vminst.ml | 36 +++++++++++++++++++++++++++++++++ 10 files changed, 62 insertions(+), 10 deletions(-) diff --git a/src/frontend/display.ml b/src/frontend/display.ml index faa8aecb4..171d4a202 100644 --- a/src/frontend/display.ml +++ b/src/frontend/display.ml @@ -226,6 +226,7 @@ let show_base_type = function | TextInfoType -> "text-info" | InputPosType -> "input-position" | HyphenationType -> "hyphenation" + | UnidataType -> "unicode-char-database" type paren_level = diff --git a/src/frontend/evalUtil.ml b/src/frontend/evalUtil.ml index 1e94cc012..072045769 100644 --- a/src/frontend/evalUtil.ml +++ b/src/frontend/evalUtil.ml @@ -467,6 +467,11 @@ let get_hyphenation : syntactic_value -> LoadHyph.t = function | value -> report_bug_value "get_hyphenation" value +let get_unicode_char_database : syntactic_value -> ScriptDataMap.t * LineBreakDataMap.t = function + | BaseConstant(BCUnidata(unidata)) -> unidata + | value -> report_bug_value "get_unicode_char_database" value + + let get_outline (value : syntactic_value) = match value with | Tuple([ diff --git a/src/frontend/fontInfo.ml b/src/frontend/fontInfo.ml index b9a4bac00..47417173b 100644 --- a/src/frontend/fontInfo.ml +++ b/src/frontend/fontInfo.ml @@ -409,16 +409,19 @@ let get_font_dictionary (pdf : Pdf.t) : Pdf.pdfobject = Pdf.Dictionary(keyval) -let initialize ~base_dir:(absdir_base : abs_path) = +let initialize () = let res = let open ResultMonad in FontHashTable.initialize (); MathFontHashTable.initialize (); +(* TODO: remove the following: *) +(* let abspath_S = append_to_abs_directory absdir_base "unidata/Scripts.txt" in let abspath_EAW = append_to_abs_directory absdir_base "unidata/EastAsianWidth.txt" in ScriptDataMap.set_from_file abspath_S abspath_EAW; let abspath_LB = append_to_abs_directory absdir_base "unidata/LineBreak.txt" in LineBreakDataMap.set_from_file abspath_LB; +*) return () in match res with diff --git a/src/frontend/fontInfo.mli b/src/frontend/fontInfo.mli index 5f63fd9fb..90a09c5f3 100644 --- a/src/frontend/fontInfo.mli +++ b/src/frontend/fontInfo.mli @@ -13,7 +13,7 @@ type math_key = FontKey.t type tag = string -val initialize : base_dir:abs_path -> unit +val initialize : unit -> unit val add_single : abs_path -> key diff --git a/src/frontend/main.ml b/src/frontend/main.ml index f3299f7de..c11b41f54 100644 --- a/src/frontend/main.ml +++ b/src/frontend/main.ml @@ -13,12 +13,12 @@ let version = (* Initialization that should be performed before typechecking *) -let initialize ~(base_dir : abs_path) ~(is_bytecomp_mode : bool) (output_mode : output_mode) (runtime_config : runtime_config) : Typeenv.t * environment = +let initialize ~(is_bytecomp_mode : bool) (output_mode : output_mode) (runtime_config : runtime_config) : Typeenv.t * environment = FreeID.initialize (); BoundID.initialize (); EvalVarID.initialize (); StoreID.initialize (); - FontInfo.initialize ~base_dir; + FontInfo.initialize (); let (tyenv, env) = match output_mode with | TextMode(_) -> @@ -169,7 +169,7 @@ let build_package let abspath_envelope_config = make_absolute_if_relative ~origin:absdir_current fpath_in in let abspath_deps_config = make_absolute_if_relative ~origin:absdir_current fpath_deps in - let absdir_base = make_absolute_if_relative ~origin:absdir_current fpath_base in + let _absdir_base = make_absolute_if_relative ~origin:absdir_current fpath_base in (* TODO: remove this *) let output_mode = make_output_mode text_mode_formats_str_opt in let typecheck_config = @@ -186,7 +186,7 @@ let build_package (* Gets the initial type environment, which consists only of primitives: *) let (tyenv_prim, _env) = - initialize ~base_dir:absdir_base ~is_bytecomp_mode:false output_mode runtime_config + initialize ~is_bytecomp_mode:false output_mode runtime_config in (* Loads the deps config: *) @@ -245,7 +245,7 @@ let open ResultMonad in let abspath_out = make_absolute_if_relative ~origin:absdir_current fpath_out in let abspath_dump = make_absolute_if_relative ~origin:absdir_current fpath_dump in let abspath_deps_config = make_absolute_if_relative ~origin:absdir_current fpath_deps in - let absdir_base = make_absolute_if_relative ~origin:absdir_current fpath_base in + let _absdir_base = make_absolute_if_relative ~origin:absdir_current fpath_base in (* TODO: remove this *) let output_mode = make_output_mode text_mode_formats_str_opt in let typecheck_config = @@ -272,7 +272,7 @@ let open ResultMonad in (* Gets the initial type environment, which consists only of primitives: *) let (tyenv_prim, env) = - initialize ~base_dir:absdir_base ~is_bytecomp_mode output_mode runtime_config + initialize ~is_bytecomp_mode output_mode runtime_config in (* Loads the deps config: *) @@ -342,7 +342,7 @@ let test_package let abspath_in = make_absolute_if_relative ~origin:absdir_current fpath_in in let abspath_deps_config = make_absolute_if_relative ~origin:absdir_current fpath_deps in - let absdir_base = make_absolute_if_relative ~origin:absdir_current fpath_base in + let _absdir_base = make_absolute_if_relative ~origin:absdir_current fpath_base in (* TODO: remove this *) let output_mode = make_output_mode text_mode_formats_str_opt in let typecheck_config = @@ -359,7 +359,7 @@ let test_package (* Gets the initial type environment, which consists only of pritmives: *) let (tyenv_prim, env) = - initialize ~base_dir:absdir_base ~is_bytecomp_mode:false output_mode runtime_config + initialize ~is_bytecomp_mode:false output_mode runtime_config in (* Loads the deps config: *) diff --git a/src/frontend/primitives.cppo.ml b/src/frontend/primitives.cppo.ml index f7b2fed45..20910cc7b 100644 --- a/src/frontend/primitives.cppo.ml +++ b/src/frontend/primitives.cppo.ml @@ -48,6 +48,7 @@ let tRE = (~! "regexp" , BaseType(RegExpType)) let tTCTX = (~! "text-info" , BaseType(TextInfoType)) let tIPOS = (~! "input-position", BaseType(InputPosType)) let tHYPH = (~! "hyphenation" , BaseType(HyphenationType)) +let tUCD = (~! "unicode-char-database", BaseType(UnidataType)) let tL ty = (~! "list", ListType(ty)) let tR ty = (~! "ref", RefType(ty)) diff --git a/src/frontend/types.cppo.ml b/src/frontend/types.cppo.ml index d3fe5191f..d35ad0466 100644 --- a/src/frontend/types.cppo.ml +++ b/src/frontend/types.cppo.ml @@ -121,6 +121,7 @@ type base_type = | TextInfoType | InputPosType | HyphenationType + | UnidataType [@@deriving show] @@ -177,6 +178,7 @@ let base_type_map : base_type TypeNameMap.t = ("text-info" , TextInfoType); ("input-position", InputPosType); ("hyphenation" , HyphenationType); + ("unicode-char-database", UnidataType); ] @@ -710,6 +712,8 @@ type base_constant = | BCInputPos of input_position | BCHyphenation of LoadHyph.t [@printer (fun fmt _ -> Format.fprintf fmt "")] + | BCUnidata of (ScriptDataMap.t * LineBreakDataMap.t) + [@printer (fun fmt _ -> Format.fprintf fmt "")] [@@deriving show { with_path = false; }] type 'a letrec_binding_scheme = diff --git a/tools/gencode/type.ml b/tools/gencode/type.ml index f6fa1be62..c9d7bb07c 100644 --- a/tools/gencode/type.ml +++ b/tools/gencode/type.ml @@ -140,6 +140,7 @@ let tIGR = tycon0 "tIGR" "inline-graphics" let tIGRO = tycon0 "tIGRO" "inline-graphics-outer" let tDOCINFODIC = tycon0 "tDOCINFODIC" "document-information-dictionary" let tHYPH = tycon0 "tHYPH" "hyphenation" +let tUCD = tycon0 "tUCD" "unicode-char-database" let tL = tycon1 "tL" "list" let tR = tycon1 "tR" "ref" let tOPT = tycon1 "tOPT" "option" diff --git a/tools/gencode/type.mli b/tools/gencode/type.mli index 1b4e3e83b..2e0bb0045 100644 --- a/tools/gencode/type.mli +++ b/tools/gencode/type.mli @@ -64,6 +64,7 @@ val tIGRO : t val tDASH : t val tDOCINFODIC : t val tHYPH : t +val tUCD : t val tPROD : t list -> t diff --git a/tools/gencode/vminst.ml b/tools/gencode/vminst.ml index 8ff3120b9..519b01009 100644 --- a/tools/gencode/vminst.ml +++ b/tools/gencode/vminst.ml @@ -151,6 +151,42 @@ Context({ ctx with hyphen_dictionary = hyph; }, ctxsub) let abspath_hyph = MyUtil.make_abs_path abspathstr_hyph in let hyph = LoadHyph.main abspath_hyph in BaseConstant(BCHyphenation(hyph)) +|} + ; inst "PrimitiveSetUnicodeCharDatabase" + ~name:"set-unicode-char-database" + ~type_:Type.(tUCD @-> tCTX @-> tCTX) + ~fields:[ + ] + ~params:[ + param "(script_map, line_break_map)" ~type_:"unicode_char_database"; + param "(ctx, ctxsub)" ~type_:"context"; + ] + ~is_pdf_mode_primitive:true + ~code:{| +Context({ ctx with script_map; line_break_map; }, ctxsub) +|} + ; inst "PrimitiveLoadUnicodeCharDatabase" + ~name:"load-unicode-char-database" + ~type_:Type.(tS @-> tS @-> tS @-> tUCD) + ~fields:[ + ] + ~params:[ + param "abspathstr_script" ~type_:"string"; + param "abspathstr_east_asian_width" ~type_:"string"; + param "abspathstr_line_break" ~type_:"string"; + ] + ~is_pdf_mode_primitive:true + ~code:{| +let script_map = + ScriptDataMap.make_from_file + ~script:(MyUtil.make_abs_path abspathstr_script) + ~east_asian_width:(MyUtil.make_abs_path abspathstr_east_asian_width) +in +let line_break_map = + LineBreakDataMap.make_from_file + (MyUtil.make_abs_path abspathstr_line_break) +in +BaseConstant(BCUnidata((script_map, line_break_map))) |} ; inst "PrimitiveGetLeftMathClass" ~name:"get-left-math-class" From 26677d4237804862529a56a0803a9ed5484e99f2 Mon Sep 17 00:00:00 2001 From: Takashi Suwa Date: Tue, 26 Mar 2024 23:18:22 +0900 Subject: [PATCH 03/13] introduce package `unidata` --- .../packages/unidata/unidata.0.0.1/saphe.yaml | 11 + .../unidata/unidata.0.0.1/src/unidata.satyh | 9 + .../unidata/unidata.0.0.1}/unidata/.gitignore | 0 .../unidata.0.0.1}/unidata/EastAsianWidth.txt | 0 .../unidata.0.0.1}/unidata/LineBreak.txt | 0 .../unidata.0.0.1}/unidata/Scripts.txt | 0 lib-satysfi/unidata/PropList.txt | 1618 - lib-satysfi/unidata/PropertyAliases.txt | 197 - lib-satysfi/unidata/PropertyValueAliases.txt | 1496 - lib-satysfi/unidata/ScriptExtensions.txt | 501 - lib-satysfi/unidata/UnicodeData.txt | 31618 ---------------- 11 files changed, 20 insertions(+), 35430 deletions(-) create mode 100644 lib-satysfi/packages/unidata/unidata.0.0.1/saphe.yaml create mode 100644 lib-satysfi/packages/unidata/unidata.0.0.1/src/unidata.satyh rename lib-satysfi/{ => packages/unidata/unidata.0.0.1}/unidata/.gitignore (100%) rename lib-satysfi/{ => packages/unidata/unidata.0.0.1}/unidata/EastAsianWidth.txt (100%) rename lib-satysfi/{ => packages/unidata/unidata.0.0.1}/unidata/LineBreak.txt (100%) rename lib-satysfi/{ => packages/unidata/unidata.0.0.1}/unidata/Scripts.txt (100%) delete mode 100644 lib-satysfi/unidata/PropList.txt delete mode 100644 lib-satysfi/unidata/PropertyAliases.txt delete mode 100644 lib-satysfi/unidata/PropertyValueAliases.txt delete mode 100644 lib-satysfi/unidata/ScriptExtensions.txt delete mode 100644 lib-satysfi/unidata/UnicodeData.txt diff --git a/lib-satysfi/packages/unidata/unidata.0.0.1/saphe.yaml b/lib-satysfi/packages/unidata/unidata.0.0.1/saphe.yaml new file mode 100644 index 000000000..b4791bf96 --- /dev/null +++ b/lib-satysfi/packages/unidata/unidata.0.0.1/saphe.yaml @@ -0,0 +1,11 @@ +saphe: "^0.0.1" +satysfi: "^0.1.0" +name: "unidata" +authors: + - "Takashi Suwa <@gfngfn>" +contents: + library: + main_module: "Unidata" + source_directories: + - "./src" +dependencies: [] diff --git a/lib-satysfi/packages/unidata/unidata.0.0.1/src/unidata.satyh b/lib-satysfi/packages/unidata/unidata.0.0.1/src/unidata.satyh new file mode 100644 index 000000000..f2acf9af8 --- /dev/null +++ b/lib-satysfi/packages/unidata/unidata.0.0.1/src/unidata.satyh @@ -0,0 +1,9 @@ +module Unidata :> sig + val unidata : unicode-char-database +end = struct + val unidata = + load-unicode-char-database + (here ^ `/../unidata/Scripts.txt`) + (here ^ `/../unidata/EastAsianWidth.txt`) + (here ^ `/../unidata/LineBreak.txt`) +end diff --git a/lib-satysfi/unidata/.gitignore b/lib-satysfi/packages/unidata/unidata.0.0.1/unidata/.gitignore similarity index 100% rename from lib-satysfi/unidata/.gitignore rename to lib-satysfi/packages/unidata/unidata.0.0.1/unidata/.gitignore diff --git a/lib-satysfi/unidata/EastAsianWidth.txt b/lib-satysfi/packages/unidata/unidata.0.0.1/unidata/EastAsianWidth.txt similarity index 100% rename from lib-satysfi/unidata/EastAsianWidth.txt rename to lib-satysfi/packages/unidata/unidata.0.0.1/unidata/EastAsianWidth.txt diff --git a/lib-satysfi/unidata/LineBreak.txt b/lib-satysfi/packages/unidata/unidata.0.0.1/unidata/LineBreak.txt similarity index 100% rename from lib-satysfi/unidata/LineBreak.txt rename to lib-satysfi/packages/unidata/unidata.0.0.1/unidata/LineBreak.txt diff --git a/lib-satysfi/unidata/Scripts.txt b/lib-satysfi/packages/unidata/unidata.0.0.1/unidata/Scripts.txt similarity index 100% rename from lib-satysfi/unidata/Scripts.txt rename to lib-satysfi/packages/unidata/unidata.0.0.1/unidata/Scripts.txt diff --git a/lib-satysfi/unidata/PropList.txt b/lib-satysfi/unidata/PropList.txt deleted file mode 100644 index 9a2d0e4b1..000000000 --- a/lib-satysfi/unidata/PropList.txt +++ /dev/null @@ -1,1618 +0,0 @@ -# PropList-10.0.0.txt -# Date: 2017-03-10, 08:25:30 GMT -# © 2017 Unicode®, Inc. -# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. -# For terms of use, see http://www.unicode.org/terms_of_use.html -# -# Unicode Character Database -# For documentation, see http://www.unicode.org/reports/tr44/ - -# ================================================ - -0009..000D ; White_Space # Cc [5] .. -0020 ; White_Space # Zs SPACE -0085 ; White_Space # Cc -00A0 ; White_Space # Zs NO-BREAK SPACE -1680 ; White_Space # Zs OGHAM SPACE MARK -2000..200A ; White_Space # Zs [11] EN QUAD..HAIR SPACE -2028 ; White_Space # Zl LINE SEPARATOR -2029 ; White_Space # Zp PARAGRAPH SEPARATOR -202F ; White_Space # Zs NARROW NO-BREAK SPACE -205F ; White_Space # Zs MEDIUM MATHEMATICAL SPACE -3000 ; White_Space # Zs IDEOGRAPHIC SPACE - -# Total code points: 25 - -# ================================================ - -061C ; Bidi_Control # Cf ARABIC LETTER MARK -200E..200F ; Bidi_Control # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK -202A..202E ; Bidi_Control # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE -2066..2069 ; Bidi_Control # Cf [4] LEFT-TO-RIGHT ISOLATE..POP DIRECTIONAL ISOLATE - -# Total code points: 12 - -# ================================================ - -200C..200D ; Join_Control # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER - -# Total code points: 2 - -# ================================================ - -002D ; Dash # Pd HYPHEN-MINUS -058A ; Dash # Pd ARMENIAN HYPHEN -05BE ; Dash # Pd HEBREW PUNCTUATION MAQAF -1400 ; Dash # Pd CANADIAN SYLLABICS HYPHEN -1806 ; Dash # Pd MONGOLIAN TODO SOFT HYPHEN -2010..2015 ; Dash # Pd [6] HYPHEN..HORIZONTAL BAR -2053 ; Dash # Po SWUNG DASH -207B ; Dash # Sm SUPERSCRIPT MINUS -208B ; Dash # Sm SUBSCRIPT MINUS -2212 ; Dash # Sm MINUS SIGN -2E17 ; Dash # Pd DOUBLE OBLIQUE HYPHEN -2E1A ; Dash # Pd HYPHEN WITH DIAERESIS -2E3A..2E3B ; Dash # Pd [2] TWO-EM DASH..THREE-EM DASH -2E40 ; Dash # Pd DOUBLE HYPHEN -301C ; Dash # Pd WAVE DASH -3030 ; Dash # Pd WAVY DASH -30A0 ; Dash # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN -FE31..FE32 ; Dash # Pd [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION FORM FOR VERTICAL EN DASH -FE58 ; Dash # Pd SMALL EM DASH -FE63 ; Dash # Pd SMALL HYPHEN-MINUS -FF0D ; Dash # Pd FULLWIDTH HYPHEN-MINUS - -# Total code points: 28 - -# ================================================ - -002D ; Hyphen # Pd HYPHEN-MINUS -00AD ; Hyphen # Cf SOFT HYPHEN -058A ; Hyphen # Pd ARMENIAN HYPHEN -1806 ; Hyphen # Pd MONGOLIAN TODO SOFT HYPHEN -2010..2011 ; Hyphen # Pd [2] HYPHEN..NON-BREAKING HYPHEN -2E17 ; Hyphen # Pd DOUBLE OBLIQUE HYPHEN -30FB ; Hyphen # Po KATAKANA MIDDLE DOT -FE63 ; Hyphen # Pd SMALL HYPHEN-MINUS -FF0D ; Hyphen # Pd FULLWIDTH HYPHEN-MINUS -FF65 ; Hyphen # Po HALFWIDTH KATAKANA MIDDLE DOT - -# Total code points: 11 - -# ================================================ - -0022 ; Quotation_Mark # Po QUOTATION MARK -0027 ; Quotation_Mark # Po APOSTROPHE -00AB ; Quotation_Mark # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK -00BB ; Quotation_Mark # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK -2018 ; Quotation_Mark # Pi LEFT SINGLE QUOTATION MARK -2019 ; Quotation_Mark # Pf RIGHT SINGLE QUOTATION MARK -201A ; Quotation_Mark # Ps SINGLE LOW-9 QUOTATION MARK -201B..201C ; Quotation_Mark # Pi [2] SINGLE HIGH-REVERSED-9 QUOTATION MARK..LEFT DOUBLE QUOTATION MARK -201D ; Quotation_Mark # Pf RIGHT DOUBLE QUOTATION MARK -201E ; Quotation_Mark # Ps DOUBLE LOW-9 QUOTATION MARK -201F ; Quotation_Mark # Pi DOUBLE HIGH-REVERSED-9 QUOTATION MARK -2039 ; Quotation_Mark # Pi SINGLE LEFT-POINTING ANGLE QUOTATION MARK -203A ; Quotation_Mark # Pf SINGLE RIGHT-POINTING ANGLE QUOTATION MARK -2E42 ; Quotation_Mark # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK -300C ; Quotation_Mark # Ps LEFT CORNER BRACKET -300D ; Quotation_Mark # Pe RIGHT CORNER BRACKET -300E ; Quotation_Mark # Ps LEFT WHITE CORNER BRACKET -300F ; Quotation_Mark # Pe RIGHT WHITE CORNER BRACKET -301D ; Quotation_Mark # Ps REVERSED DOUBLE PRIME QUOTATION MARK -301E..301F ; Quotation_Mark # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK -FE41 ; Quotation_Mark # Ps PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET -FE42 ; Quotation_Mark # Pe PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET -FE43 ; Quotation_Mark # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET -FE44 ; Quotation_Mark # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET -FF02 ; Quotation_Mark # Po FULLWIDTH QUOTATION MARK -FF07 ; Quotation_Mark # Po FULLWIDTH APOSTROPHE -FF62 ; Quotation_Mark # Ps HALFWIDTH LEFT CORNER BRACKET -FF63 ; Quotation_Mark # Pe HALFWIDTH RIGHT CORNER BRACKET - -# Total code points: 30 - -# ================================================ - -0021 ; Terminal_Punctuation # Po EXCLAMATION MARK -002C ; Terminal_Punctuation # Po COMMA -002E ; Terminal_Punctuation # Po FULL STOP -003A..003B ; Terminal_Punctuation # Po [2] COLON..SEMICOLON -003F ; Terminal_Punctuation # Po QUESTION MARK -037E ; Terminal_Punctuation # Po GREEK QUESTION MARK -0387 ; Terminal_Punctuation # Po GREEK ANO TELEIA -0589 ; Terminal_Punctuation # Po ARMENIAN FULL STOP -05C3 ; Terminal_Punctuation # Po HEBREW PUNCTUATION SOF PASUQ -060C ; Terminal_Punctuation # Po ARABIC COMMA -061B ; Terminal_Punctuation # Po ARABIC SEMICOLON -061F ; Terminal_Punctuation # Po ARABIC QUESTION MARK -06D4 ; Terminal_Punctuation # Po ARABIC FULL STOP -0700..070A ; Terminal_Punctuation # Po [11] SYRIAC END OF PARAGRAPH..SYRIAC CONTRACTION -070C ; Terminal_Punctuation # Po SYRIAC HARKLEAN METOBELUS -07F8..07F9 ; Terminal_Punctuation # Po [2] NKO COMMA..NKO EXCLAMATION MARK -0830..083E ; Terminal_Punctuation # Po [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU -085E ; Terminal_Punctuation # Po MANDAIC PUNCTUATION -0964..0965 ; Terminal_Punctuation # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA -0E5A..0E5B ; Terminal_Punctuation # Po [2] THAI CHARACTER ANGKHANKHU..THAI CHARACTER KHOMUT -0F08 ; Terminal_Punctuation # Po TIBETAN MARK SBRUL SHAD -0F0D..0F12 ; Terminal_Punctuation # Po [6] TIBETAN MARK SHAD..TIBETAN MARK RGYA GRAM SHAD -104A..104B ; Terminal_Punctuation # Po [2] MYANMAR SIGN LITTLE SECTION..MYANMAR SIGN SECTION -1361..1368 ; Terminal_Punctuation # Po [8] ETHIOPIC WORDSPACE..ETHIOPIC PARAGRAPH SEPARATOR -166D..166E ; Terminal_Punctuation # Po [2] CANADIAN SYLLABICS CHI SIGN..CANADIAN SYLLABICS FULL STOP -16EB..16ED ; Terminal_Punctuation # Po [3] RUNIC SINGLE PUNCTUATION..RUNIC CROSS PUNCTUATION -1735..1736 ; Terminal_Punctuation # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION -17D4..17D6 ; Terminal_Punctuation # Po [3] KHMER SIGN KHAN..KHMER SIGN CAMNUC PII KUUH -17DA ; Terminal_Punctuation # Po KHMER SIGN KOOMUUT -1802..1805 ; Terminal_Punctuation # Po [4] MONGOLIAN COMMA..MONGOLIAN FOUR DOTS -1808..1809 ; Terminal_Punctuation # Po [2] MONGOLIAN MANCHU COMMA..MONGOLIAN MANCHU FULL STOP -1944..1945 ; Terminal_Punctuation # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK -1AA8..1AAB ; Terminal_Punctuation # Po [4] TAI THAM SIGN KAAN..TAI THAM SIGN SATKAANKUU -1B5A..1B5B ; Terminal_Punctuation # Po [2] BALINESE PANTI..BALINESE PAMADA -1B5D..1B5F ; Terminal_Punctuation # Po [3] BALINESE CARIK PAMUNGKAH..BALINESE CARIK PAREREN -1C3B..1C3F ; Terminal_Punctuation # Po [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK -1C7E..1C7F ; Terminal_Punctuation # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD -203C..203D ; Terminal_Punctuation # Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG -2047..2049 ; Terminal_Punctuation # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK -2E2E ; Terminal_Punctuation # Po REVERSED QUESTION MARK -2E3C ; Terminal_Punctuation # Po STENOGRAPHIC FULL STOP -2E41 ; Terminal_Punctuation # Po REVERSED COMMA -3001..3002 ; Terminal_Punctuation # Po [2] IDEOGRAPHIC COMMA..IDEOGRAPHIC FULL STOP -A4FE..A4FF ; Terminal_Punctuation # Po [2] LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP -A60D..A60F ; Terminal_Punctuation # Po [3] VAI COMMA..VAI QUESTION MARK -A6F3..A6F7 ; Terminal_Punctuation # Po [5] BAMUM FULL STOP..BAMUM QUESTION MARK -A876..A877 ; Terminal_Punctuation # Po [2] PHAGS-PA MARK SHAD..PHAGS-PA MARK DOUBLE SHAD -A8CE..A8CF ; Terminal_Punctuation # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA -A92F ; Terminal_Punctuation # Po KAYAH LI SIGN SHYA -A9C7..A9C9 ; Terminal_Punctuation # Po [3] JAVANESE PADA PANGKAT..JAVANESE PADA LUNGSI -AA5D..AA5F ; Terminal_Punctuation # Po [3] CHAM PUNCTUATION DANDA..CHAM PUNCTUATION TRIPLE DANDA -AADF ; Terminal_Punctuation # Po TAI VIET SYMBOL KOI KOI -AAF0..AAF1 ; Terminal_Punctuation # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM -ABEB ; Terminal_Punctuation # Po MEETEI MAYEK CHEIKHEI -FE50..FE52 ; Terminal_Punctuation # Po [3] SMALL COMMA..SMALL FULL STOP -FE54..FE57 ; Terminal_Punctuation # Po [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK -FF01 ; Terminal_Punctuation # Po FULLWIDTH EXCLAMATION MARK -FF0C ; Terminal_Punctuation # Po FULLWIDTH COMMA -FF0E ; Terminal_Punctuation # Po FULLWIDTH FULL STOP -FF1A..FF1B ; Terminal_Punctuation # Po [2] FULLWIDTH COLON..FULLWIDTH SEMICOLON -FF1F ; Terminal_Punctuation # Po FULLWIDTH QUESTION MARK -FF61 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC FULL STOP -FF64 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC COMMA -1039F ; Terminal_Punctuation # Po UGARITIC WORD DIVIDER -103D0 ; Terminal_Punctuation # Po OLD PERSIAN WORD DIVIDER -10857 ; Terminal_Punctuation # Po IMPERIAL ARAMAIC SECTION SIGN -1091F ; Terminal_Punctuation # Po PHOENICIAN WORD SEPARATOR -10A56..10A57 ; Terminal_Punctuation # Po [2] KHAROSHTHI PUNCTUATION DANDA..KHAROSHTHI PUNCTUATION DOUBLE DANDA -10AF0..10AF5 ; Terminal_Punctuation # Po [6] MANICHAEAN PUNCTUATION STAR..MANICHAEAN PUNCTUATION TWO DOTS -10B3A..10B3F ; Terminal_Punctuation # Po [6] TINY TWO DOTS OVER ONE DOT PUNCTUATION..LARGE ONE RING OVER TWO RINGS PUNCTUATION -10B99..10B9C ; Terminal_Punctuation # Po [4] PSALTER PAHLAVI SECTION MARK..PSALTER PAHLAVI FOUR DOTS WITH DOT -11047..1104D ; Terminal_Punctuation # Po [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS -110BE..110C1 ; Terminal_Punctuation # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA -11141..11143 ; Terminal_Punctuation # Po [3] CHAKMA DANDA..CHAKMA QUESTION MARK -111C5..111C6 ; Terminal_Punctuation # Po [2] SHARADA DANDA..SHARADA DOUBLE DANDA -111CD ; Terminal_Punctuation # Po SHARADA SUTRA MARK -111DE..111DF ; Terminal_Punctuation # Po [2] SHARADA SECTION MARK-1..SHARADA SECTION MARK-2 -11238..1123C ; Terminal_Punctuation # Po [5] KHOJKI DANDA..KHOJKI DOUBLE SECTION MARK -112A9 ; Terminal_Punctuation # Po MULTANI SECTION MARK -1144B..1144D ; Terminal_Punctuation # Po [3] NEWA DANDA..NEWA COMMA -1145B ; Terminal_Punctuation # Po NEWA PLACEHOLDER MARK -115C2..115C5 ; Terminal_Punctuation # Po [4] SIDDHAM DANDA..SIDDHAM SEPARATOR BAR -115C9..115D7 ; Terminal_Punctuation # Po [15] SIDDHAM END OF TEXT MARK..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES -11641..11642 ; Terminal_Punctuation # Po [2] MODI DANDA..MODI DOUBLE DANDA -1173C..1173E ; Terminal_Punctuation # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI -11A42..11A43 ; Terminal_Punctuation # Po [2] ZANABAZAR SQUARE MARK SHAD..ZANABAZAR SQUARE MARK DOUBLE SHAD -11A9B..11A9C ; Terminal_Punctuation # Po [2] SOYOMBO MARK SHAD..SOYOMBO MARK DOUBLE SHAD -11AA1..11AA2 ; Terminal_Punctuation # Po [2] SOYOMBO TERMINAL MARK-1..SOYOMBO TERMINAL MARK-2 -11C41..11C43 ; Terminal_Punctuation # Po [3] BHAIKSUKI DANDA..BHAIKSUKI WORD SEPARATOR -11C71 ; Terminal_Punctuation # Po MARCHEN MARK SHAD -12470..12474 ; Terminal_Punctuation # Po [5] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL QUADCOLON -16A6E..16A6F ; Terminal_Punctuation # Po [2] MRO DANDA..MRO DOUBLE DANDA -16AF5 ; Terminal_Punctuation # Po BASSA VAH FULL STOP -16B37..16B39 ; Terminal_Punctuation # Po [3] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN CIM CHEEM -16B44 ; Terminal_Punctuation # Po PAHAWH HMONG SIGN XAUS -1BC9F ; Terminal_Punctuation # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP -1DA87..1DA8A ; Terminal_Punctuation # Po [4] SIGNWRITING COMMA..SIGNWRITING COLON - -# Total code points: 252 - -# ================================================ - -005E ; Other_Math # Sk CIRCUMFLEX ACCENT -03D0..03D2 ; Other_Math # L& [3] GREEK BETA SYMBOL..GREEK UPSILON WITH HOOK SYMBOL -03D5 ; Other_Math # L& GREEK PHI SYMBOL -03F0..03F1 ; Other_Math # L& [2] GREEK KAPPA SYMBOL..GREEK RHO SYMBOL -03F4..03F5 ; Other_Math # L& [2] GREEK CAPITAL THETA SYMBOL..GREEK LUNATE EPSILON SYMBOL -2016 ; Other_Math # Po DOUBLE VERTICAL LINE -2032..2034 ; Other_Math # Po [3] PRIME..TRIPLE PRIME -2040 ; Other_Math # Pc CHARACTER TIE -2061..2064 ; Other_Math # Cf [4] FUNCTION APPLICATION..INVISIBLE PLUS -207D ; Other_Math # Ps SUPERSCRIPT LEFT PARENTHESIS -207E ; Other_Math # Pe SUPERSCRIPT RIGHT PARENTHESIS -208D ; Other_Math # Ps SUBSCRIPT LEFT PARENTHESIS -208E ; Other_Math # Pe SUBSCRIPT RIGHT PARENTHESIS -20D0..20DC ; Other_Math # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE -20E1 ; Other_Math # Mn COMBINING LEFT RIGHT ARROW ABOVE -20E5..20E6 ; Other_Math # Mn [2] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING DOUBLE VERTICAL STROKE OVERLAY -20EB..20EF ; Other_Math # Mn [5] COMBINING LONG DOUBLE SOLIDUS OVERLAY..COMBINING RIGHT ARROW BELOW -2102 ; Other_Math # L& DOUBLE-STRUCK CAPITAL C -2107 ; Other_Math # L& EULER CONSTANT -210A..2113 ; Other_Math # L& [10] SCRIPT SMALL G..SCRIPT SMALL L -2115 ; Other_Math # L& DOUBLE-STRUCK CAPITAL N -2119..211D ; Other_Math # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R -2124 ; Other_Math # L& DOUBLE-STRUCK CAPITAL Z -2128 ; Other_Math # L& BLACK-LETTER CAPITAL Z -2129 ; Other_Math # So TURNED GREEK SMALL LETTER IOTA -212C..212D ; Other_Math # L& [2] SCRIPT CAPITAL B..BLACK-LETTER CAPITAL C -212F..2131 ; Other_Math # L& [3] SCRIPT SMALL E..SCRIPT CAPITAL F -2133..2134 ; Other_Math # L& [2] SCRIPT CAPITAL M..SCRIPT SMALL O -2135..2138 ; Other_Math # Lo [4] ALEF SYMBOL..DALET SYMBOL -213C..213F ; Other_Math # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI -2145..2149 ; Other_Math # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J -2195..2199 ; Other_Math # So [5] UP DOWN ARROW..SOUTH WEST ARROW -219C..219F ; Other_Math # So [4] LEFTWARDS WAVE ARROW..UPWARDS TWO HEADED ARROW -21A1..21A2 ; Other_Math # So [2] DOWNWARDS TWO HEADED ARROW..LEFTWARDS ARROW WITH TAIL -21A4..21A5 ; Other_Math # So [2] LEFTWARDS ARROW FROM BAR..UPWARDS ARROW FROM BAR -21A7 ; Other_Math # So DOWNWARDS ARROW FROM BAR -21A9..21AD ; Other_Math # So [5] LEFTWARDS ARROW WITH HOOK..LEFT RIGHT WAVE ARROW -21B0..21B1 ; Other_Math # So [2] UPWARDS ARROW WITH TIP LEFTWARDS..UPWARDS ARROW WITH TIP RIGHTWARDS -21B6..21B7 ; Other_Math # So [2] ANTICLOCKWISE TOP SEMICIRCLE ARROW..CLOCKWISE TOP SEMICIRCLE ARROW -21BC..21CD ; Other_Math # So [18] LEFTWARDS HARPOON WITH BARB UPWARDS..LEFTWARDS DOUBLE ARROW WITH STROKE -21D0..21D1 ; Other_Math # So [2] LEFTWARDS DOUBLE ARROW..UPWARDS DOUBLE ARROW -21D3 ; Other_Math # So DOWNWARDS DOUBLE ARROW -21D5..21DB ; Other_Math # So [7] UP DOWN DOUBLE ARROW..RIGHTWARDS TRIPLE ARROW -21DD ; Other_Math # So RIGHTWARDS SQUIGGLE ARROW -21E4..21E5 ; Other_Math # So [2] LEFTWARDS ARROW TO BAR..RIGHTWARDS ARROW TO BAR -2308 ; Other_Math # Ps LEFT CEILING -2309 ; Other_Math # Pe RIGHT CEILING -230A ; Other_Math # Ps LEFT FLOOR -230B ; Other_Math # Pe RIGHT FLOOR -23B4..23B5 ; Other_Math # So [2] TOP SQUARE BRACKET..BOTTOM SQUARE BRACKET -23B7 ; Other_Math # So RADICAL SYMBOL BOTTOM -23D0 ; Other_Math # So VERTICAL LINE EXTENSION -23E2 ; Other_Math # So WHITE TRAPEZIUM -25A0..25A1 ; Other_Math # So [2] BLACK SQUARE..WHITE SQUARE -25AE..25B6 ; Other_Math # So [9] BLACK VERTICAL RECTANGLE..BLACK RIGHT-POINTING TRIANGLE -25BC..25C0 ; Other_Math # So [5] BLACK DOWN-POINTING TRIANGLE..BLACK LEFT-POINTING TRIANGLE -25C6..25C7 ; Other_Math # So [2] BLACK DIAMOND..WHITE DIAMOND -25CA..25CB ; Other_Math # So [2] LOZENGE..WHITE CIRCLE -25CF..25D3 ; Other_Math # So [5] BLACK CIRCLE..CIRCLE WITH UPPER HALF BLACK -25E2 ; Other_Math # So BLACK LOWER RIGHT TRIANGLE -25E4 ; Other_Math # So BLACK UPPER LEFT TRIANGLE -25E7..25EC ; Other_Math # So [6] SQUARE WITH LEFT HALF BLACK..WHITE UP-POINTING TRIANGLE WITH DOT -2605..2606 ; Other_Math # So [2] BLACK STAR..WHITE STAR -2640 ; Other_Math # So FEMALE SIGN -2642 ; Other_Math # So MALE SIGN -2660..2663 ; Other_Math # So [4] BLACK SPADE SUIT..BLACK CLUB SUIT -266D..266E ; Other_Math # So [2] MUSIC FLAT SIGN..MUSIC NATURAL SIGN -27C5 ; Other_Math # Ps LEFT S-SHAPED BAG DELIMITER -27C6 ; Other_Math # Pe RIGHT S-SHAPED BAG DELIMITER -27E6 ; Other_Math # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET -27E7 ; Other_Math # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET -27E8 ; Other_Math # Ps MATHEMATICAL LEFT ANGLE BRACKET -27E9 ; Other_Math # Pe MATHEMATICAL RIGHT ANGLE BRACKET -27EA ; Other_Math # Ps MATHEMATICAL LEFT DOUBLE ANGLE BRACKET -27EB ; Other_Math # Pe MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET -27EC ; Other_Math # Ps MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET -27ED ; Other_Math # Pe MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET -27EE ; Other_Math # Ps MATHEMATICAL LEFT FLATTENED PARENTHESIS -27EF ; Other_Math # Pe MATHEMATICAL RIGHT FLATTENED PARENTHESIS -2983 ; Other_Math # Ps LEFT WHITE CURLY BRACKET -2984 ; Other_Math # Pe RIGHT WHITE CURLY BRACKET -2985 ; Other_Math # Ps LEFT WHITE PARENTHESIS -2986 ; Other_Math # Pe RIGHT WHITE PARENTHESIS -2987 ; Other_Math # Ps Z NOTATION LEFT IMAGE BRACKET -2988 ; Other_Math # Pe Z NOTATION RIGHT IMAGE BRACKET -2989 ; Other_Math # Ps Z NOTATION LEFT BINDING BRACKET -298A ; Other_Math # Pe Z NOTATION RIGHT BINDING BRACKET -298B ; Other_Math # Ps LEFT SQUARE BRACKET WITH UNDERBAR -298C ; Other_Math # Pe RIGHT SQUARE BRACKET WITH UNDERBAR -298D ; Other_Math # Ps LEFT SQUARE BRACKET WITH TICK IN TOP CORNER -298E ; Other_Math # Pe RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER -298F ; Other_Math # Ps LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER -2990 ; Other_Math # Pe RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER -2991 ; Other_Math # Ps LEFT ANGLE BRACKET WITH DOT -2992 ; Other_Math # Pe RIGHT ANGLE BRACKET WITH DOT -2993 ; Other_Math # Ps LEFT ARC LESS-THAN BRACKET -2994 ; Other_Math # Pe RIGHT ARC GREATER-THAN BRACKET -2995 ; Other_Math # Ps DOUBLE LEFT ARC GREATER-THAN BRACKET -2996 ; Other_Math # Pe DOUBLE RIGHT ARC LESS-THAN BRACKET -2997 ; Other_Math # Ps LEFT BLACK TORTOISE SHELL BRACKET -2998 ; Other_Math # Pe RIGHT BLACK TORTOISE SHELL BRACKET -29D8 ; Other_Math # Ps LEFT WIGGLY FENCE -29D9 ; Other_Math # Pe RIGHT WIGGLY FENCE -29DA ; Other_Math # Ps LEFT DOUBLE WIGGLY FENCE -29DB ; Other_Math # Pe RIGHT DOUBLE WIGGLY FENCE -29FC ; Other_Math # Ps LEFT-POINTING CURVED ANGLE BRACKET -29FD ; Other_Math # Pe RIGHT-POINTING CURVED ANGLE BRACKET -FE61 ; Other_Math # Po SMALL ASTERISK -FE63 ; Other_Math # Pd SMALL HYPHEN-MINUS -FE68 ; Other_Math # Po SMALL REVERSE SOLIDUS -FF3C ; Other_Math # Po FULLWIDTH REVERSE SOLIDUS -FF3E ; Other_Math # Sk FULLWIDTH CIRCUMFLEX ACCENT -1D400..1D454 ; Other_Math # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G -1D456..1D49C ; Other_Math # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A -1D49E..1D49F ; Other_Math # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D -1D4A2 ; Other_Math # L& MATHEMATICAL SCRIPT CAPITAL G -1D4A5..1D4A6 ; Other_Math # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K -1D4A9..1D4AC ; Other_Math # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q -1D4AE..1D4B9 ; Other_Math # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D -1D4BB ; Other_Math # L& MATHEMATICAL SCRIPT SMALL F -1D4BD..1D4C3 ; Other_Math # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N -1D4C5..1D505 ; Other_Math # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B -1D507..1D50A ; Other_Math # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G -1D50D..1D514 ; Other_Math # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q -1D516..1D51C ; Other_Math # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y -1D51E..1D539 ; Other_Math # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B -1D53B..1D53E ; Other_Math # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G -1D540..1D544 ; Other_Math # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M -1D546 ; Other_Math # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O -1D54A..1D550 ; Other_Math # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y -1D552..1D6A5 ; Other_Math # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J -1D6A8..1D6C0 ; Other_Math # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA -1D6C2..1D6DA ; Other_Math # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA -1D6DC..1D6FA ; Other_Math # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA -1D6FC..1D714 ; Other_Math # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA -1D716..1D734 ; Other_Math # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA -1D736..1D74E ; Other_Math # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA -1D750..1D76E ; Other_Math # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA -1D770..1D788 ; Other_Math # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA -1D78A..1D7A8 ; Other_Math # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA -1D7AA..1D7C2 ; Other_Math # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA -1D7C4..1D7CB ; Other_Math # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA -1D7CE..1D7FF ; Other_Math # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE -1EE00..1EE03 ; Other_Math # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL -1EE05..1EE1F ; Other_Math # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF -1EE21..1EE22 ; Other_Math # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM -1EE24 ; Other_Math # Lo ARABIC MATHEMATICAL INITIAL HEH -1EE27 ; Other_Math # Lo ARABIC MATHEMATICAL INITIAL HAH -1EE29..1EE32 ; Other_Math # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF -1EE34..1EE37 ; Other_Math # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH -1EE39 ; Other_Math # Lo ARABIC MATHEMATICAL INITIAL DAD -1EE3B ; Other_Math # Lo ARABIC MATHEMATICAL INITIAL GHAIN -1EE42 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED JEEM -1EE47 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED HAH -1EE49 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED YEH -1EE4B ; Other_Math # Lo ARABIC MATHEMATICAL TAILED LAM -1EE4D..1EE4F ; Other_Math # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN -1EE51..1EE52 ; Other_Math # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF -1EE54 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED SHEEN -1EE57 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED KHAH -1EE59 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED DAD -1EE5B ; Other_Math # Lo ARABIC MATHEMATICAL TAILED GHAIN -1EE5D ; Other_Math # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON -1EE5F ; Other_Math # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF -1EE61..1EE62 ; Other_Math # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM -1EE64 ; Other_Math # Lo ARABIC MATHEMATICAL STRETCHED HEH -1EE67..1EE6A ; Other_Math # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF -1EE6C..1EE72 ; Other_Math # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF -1EE74..1EE77 ; Other_Math # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH -1EE79..1EE7C ; Other_Math # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH -1EE7E ; Other_Math # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH -1EE80..1EE89 ; Other_Math # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH -1EE8B..1EE9B ; Other_Math # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN -1EEA1..1EEA3 ; Other_Math # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL -1EEA5..1EEA9 ; Other_Math # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH -1EEAB..1EEBB ; Other_Math # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN - -# Total code points: 1362 - -# ================================================ - -0030..0039 ; Hex_Digit # Nd [10] DIGIT ZERO..DIGIT NINE -0041..0046 ; Hex_Digit # L& [6] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER F -0061..0066 ; Hex_Digit # L& [6] LATIN SMALL LETTER A..LATIN SMALL LETTER F -FF10..FF19 ; Hex_Digit # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE -FF21..FF26 ; Hex_Digit # L& [6] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER F -FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER F - -# Total code points: 44 - -# ================================================ - -0030..0039 ; ASCII_Hex_Digit # Nd [10] DIGIT ZERO..DIGIT NINE -0041..0046 ; ASCII_Hex_Digit # L& [6] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER F -0061..0066 ; ASCII_Hex_Digit # L& [6] LATIN SMALL LETTER A..LATIN SMALL LETTER F - -# Total code points: 22 - -# ================================================ - -0345 ; Other_Alphabetic # Mn COMBINING GREEK YPOGEGRAMMENI -05B0..05BD ; Other_Alphabetic # Mn [14] HEBREW POINT SHEVA..HEBREW POINT METEG -05BF ; Other_Alphabetic # Mn HEBREW POINT RAFE -05C1..05C2 ; Other_Alphabetic # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT -05C4..05C5 ; Other_Alphabetic # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT -05C7 ; Other_Alphabetic # Mn HEBREW POINT QAMATS QATAN -0610..061A ; Other_Alphabetic # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA -064B..0657 ; Other_Alphabetic # Mn [13] ARABIC FATHATAN..ARABIC INVERTED DAMMA -0659..065F ; Other_Alphabetic # Mn [7] ARABIC ZWARAKAY..ARABIC WAVY HAMZA BELOW -0670 ; Other_Alphabetic # Mn ARABIC LETTER SUPERSCRIPT ALEF -06D6..06DC ; Other_Alphabetic # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN -06E1..06E4 ; Other_Alphabetic # Mn [4] ARABIC SMALL HIGH DOTLESS HEAD OF KHAH..ARABIC SMALL HIGH MADDA -06E7..06E8 ; Other_Alphabetic # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON -06ED ; Other_Alphabetic # Mn ARABIC SMALL LOW MEEM -0711 ; Other_Alphabetic # Mn SYRIAC LETTER SUPERSCRIPT ALAPH -0730..073F ; Other_Alphabetic # Mn [16] SYRIAC PTHAHA ABOVE..SYRIAC RWAHA -07A6..07B0 ; Other_Alphabetic # Mn [11] THAANA ABAFILI..THAANA SUKUN -0816..0817 ; Other_Alphabetic # Mn [2] SAMARITAN MARK IN..SAMARITAN MARK IN-ALAF -081B..0823 ; Other_Alphabetic # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A -0825..0827 ; Other_Alphabetic # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U -0829..082C ; Other_Alphabetic # Mn [4] SAMARITAN VOWEL SIGN LONG I..SAMARITAN VOWEL SIGN SUKUN -08D4..08DF ; Other_Alphabetic # Mn [12] ARABIC SMALL HIGH WORD AR-RUB..ARABIC SMALL HIGH WORD WAQFA -08E3..08E9 ; Other_Alphabetic # Mn [7] ARABIC TURNED DAMMA BELOW..ARABIC CURLY KASRATAN -08F0..0902 ; Other_Alphabetic # Mn [19] ARABIC OPEN FATHATAN..DEVANAGARI SIGN ANUSVARA -0903 ; Other_Alphabetic # Mc DEVANAGARI SIGN VISARGA -093A ; Other_Alphabetic # Mn DEVANAGARI VOWEL SIGN OE -093B ; Other_Alphabetic # Mc DEVANAGARI VOWEL SIGN OOE -093E..0940 ; Other_Alphabetic # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II -0941..0948 ; Other_Alphabetic # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI -0949..094C ; Other_Alphabetic # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU -094E..094F ; Other_Alphabetic # Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW -0955..0957 ; Other_Alphabetic # Mn [3] DEVANAGARI VOWEL SIGN CANDRA LONG E..DEVANAGARI VOWEL SIGN UUE -0962..0963 ; Other_Alphabetic # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL -0981 ; Other_Alphabetic # Mn BENGALI SIGN CANDRABINDU -0982..0983 ; Other_Alphabetic # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA -09BE..09C0 ; Other_Alphabetic # Mc [3] BENGALI VOWEL SIGN AA..BENGALI VOWEL SIGN II -09C1..09C4 ; Other_Alphabetic # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR -09C7..09C8 ; Other_Alphabetic # Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI -09CB..09CC ; Other_Alphabetic # Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU -09D7 ; Other_Alphabetic # Mc BENGALI AU LENGTH MARK -09E2..09E3 ; Other_Alphabetic # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL -0A01..0A02 ; Other_Alphabetic # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI -0A03 ; Other_Alphabetic # Mc GURMUKHI SIGN VISARGA -0A3E..0A40 ; Other_Alphabetic # Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II -0A41..0A42 ; Other_Alphabetic # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU -0A47..0A48 ; Other_Alphabetic # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI -0A4B..0A4C ; Other_Alphabetic # Mn [2] GURMUKHI VOWEL SIGN OO..GURMUKHI VOWEL SIGN AU -0A51 ; Other_Alphabetic # Mn GURMUKHI SIGN UDAAT -0A70..0A71 ; Other_Alphabetic # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK -0A75 ; Other_Alphabetic # Mn GURMUKHI SIGN YAKASH -0A81..0A82 ; Other_Alphabetic # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA -0A83 ; Other_Alphabetic # Mc GUJARATI SIGN VISARGA -0ABE..0AC0 ; Other_Alphabetic # Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II -0AC1..0AC5 ; Other_Alphabetic # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E -0AC7..0AC8 ; Other_Alphabetic # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI -0AC9 ; Other_Alphabetic # Mc GUJARATI VOWEL SIGN CANDRA O -0ACB..0ACC ; Other_Alphabetic # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU -0AE2..0AE3 ; Other_Alphabetic # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL -0AFA..0AFC ; Other_Alphabetic # Mn [3] GUJARATI SIGN SUKUN..GUJARATI SIGN MADDAH -0B01 ; Other_Alphabetic # Mn ORIYA SIGN CANDRABINDU -0B02..0B03 ; Other_Alphabetic # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA -0B3E ; Other_Alphabetic # Mc ORIYA VOWEL SIGN AA -0B3F ; Other_Alphabetic # Mn ORIYA VOWEL SIGN I -0B40 ; Other_Alphabetic # Mc ORIYA VOWEL SIGN II -0B41..0B44 ; Other_Alphabetic # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR -0B47..0B48 ; Other_Alphabetic # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI -0B4B..0B4C ; Other_Alphabetic # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU -0B56 ; Other_Alphabetic # Mn ORIYA AI LENGTH MARK -0B57 ; Other_Alphabetic # Mc ORIYA AU LENGTH MARK -0B62..0B63 ; Other_Alphabetic # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL -0B82 ; Other_Alphabetic # Mn TAMIL SIGN ANUSVARA -0BBE..0BBF ; Other_Alphabetic # Mc [2] TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN I -0BC0 ; Other_Alphabetic # Mn TAMIL VOWEL SIGN II -0BC1..0BC2 ; Other_Alphabetic # Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU -0BC6..0BC8 ; Other_Alphabetic # Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI -0BCA..0BCC ; Other_Alphabetic # Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU -0BD7 ; Other_Alphabetic # Mc TAMIL AU LENGTH MARK -0C00 ; Other_Alphabetic # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE -0C01..0C03 ; Other_Alphabetic # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA -0C3E..0C40 ; Other_Alphabetic # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II -0C41..0C44 ; Other_Alphabetic # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR -0C46..0C48 ; Other_Alphabetic # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI -0C4A..0C4C ; Other_Alphabetic # Mn [3] TELUGU VOWEL SIGN O..TELUGU VOWEL SIGN AU -0C55..0C56 ; Other_Alphabetic # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK -0C62..0C63 ; Other_Alphabetic # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL -0C81 ; Other_Alphabetic # Mn KANNADA SIGN CANDRABINDU -0C82..0C83 ; Other_Alphabetic # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA -0CBE ; Other_Alphabetic # Mc KANNADA VOWEL SIGN AA -0CBF ; Other_Alphabetic # Mn KANNADA VOWEL SIGN I -0CC0..0CC4 ; Other_Alphabetic # Mc [5] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN VOCALIC RR -0CC6 ; Other_Alphabetic # Mn KANNADA VOWEL SIGN E -0CC7..0CC8 ; Other_Alphabetic # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI -0CCA..0CCB ; Other_Alphabetic # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO -0CCC ; Other_Alphabetic # Mn KANNADA VOWEL SIGN AU -0CD5..0CD6 ; Other_Alphabetic # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CE2..0CE3 ; Other_Alphabetic # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL -0D00..0D01 ; Other_Alphabetic # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU -0D02..0D03 ; Other_Alphabetic # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA -0D3E..0D40 ; Other_Alphabetic # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II -0D41..0D44 ; Other_Alphabetic # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR -0D46..0D48 ; Other_Alphabetic # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI -0D4A..0D4C ; Other_Alphabetic # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU -0D57 ; Other_Alphabetic # Mc MALAYALAM AU LENGTH MARK -0D62..0D63 ; Other_Alphabetic # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL -0D82..0D83 ; Other_Alphabetic # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA -0DCF..0DD1 ; Other_Alphabetic # Mc [3] SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA -0DD2..0DD4 ; Other_Alphabetic # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA -0DD6 ; Other_Alphabetic # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA -0DD8..0DDF ; Other_Alphabetic # Mc [8] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN GAYANUKITTA -0DF2..0DF3 ; Other_Alphabetic # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA -0E31 ; Other_Alphabetic # Mn THAI CHARACTER MAI HAN-AKAT -0E34..0E3A ; Other_Alphabetic # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU -0E4D ; Other_Alphabetic # Mn THAI CHARACTER NIKHAHIT -0EB1 ; Other_Alphabetic # Mn LAO VOWEL SIGN MAI KAN -0EB4..0EB9 ; Other_Alphabetic # Mn [6] LAO VOWEL SIGN I..LAO VOWEL SIGN UU -0EBB..0EBC ; Other_Alphabetic # Mn [2] LAO VOWEL SIGN MAI KON..LAO SEMIVOWEL SIGN LO -0ECD ; Other_Alphabetic # Mn LAO NIGGAHITA -0F71..0F7E ; Other_Alphabetic # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO -0F7F ; Other_Alphabetic # Mc TIBETAN SIGN RNAM BCAD -0F80..0F81 ; Other_Alphabetic # Mn [2] TIBETAN VOWEL SIGN REVERSED I..TIBETAN VOWEL SIGN REVERSED II -0F8D..0F97 ; Other_Alphabetic # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA -0F99..0FBC ; Other_Alphabetic # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA -102B..102C ; Other_Alphabetic # Mc [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA -102D..1030 ; Other_Alphabetic # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU -1031 ; Other_Alphabetic # Mc MYANMAR VOWEL SIGN E -1032..1036 ; Other_Alphabetic # Mn [5] MYANMAR VOWEL SIGN AI..MYANMAR SIGN ANUSVARA -1038 ; Other_Alphabetic # Mc MYANMAR SIGN VISARGA -103B..103C ; Other_Alphabetic # Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA -103D..103E ; Other_Alphabetic # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA -1056..1057 ; Other_Alphabetic # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR -1058..1059 ; Other_Alphabetic # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL -105E..1060 ; Other_Alphabetic # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA -1062 ; Other_Alphabetic # Mc MYANMAR VOWEL SIGN SGAW KAREN EU -1067..1068 ; Other_Alphabetic # Mc [2] MYANMAR VOWEL SIGN WESTERN PWO KAREN EU..MYANMAR VOWEL SIGN WESTERN PWO KAREN UE -1071..1074 ; Other_Alphabetic # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE -1082 ; Other_Alphabetic # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA -1083..1084 ; Other_Alphabetic # Mc [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E -1085..1086 ; Other_Alphabetic # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y -109C ; Other_Alphabetic # Mc MYANMAR VOWEL SIGN AITON A -109D ; Other_Alphabetic # Mn MYANMAR VOWEL SIGN AITON AI -135F ; Other_Alphabetic # Mn ETHIOPIC COMBINING GEMINATION MARK -1712..1713 ; Other_Alphabetic # Mn [2] TAGALOG VOWEL SIGN I..TAGALOG VOWEL SIGN U -1732..1733 ; Other_Alphabetic # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U -1752..1753 ; Other_Alphabetic # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U -1772..1773 ; Other_Alphabetic # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U -17B6 ; Other_Alphabetic # Mc KHMER VOWEL SIGN AA -17B7..17BD ; Other_Alphabetic # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA -17BE..17C5 ; Other_Alphabetic # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU -17C6 ; Other_Alphabetic # Mn KHMER SIGN NIKAHIT -17C7..17C8 ; Other_Alphabetic # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU -1885..1886 ; Other_Alphabetic # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA -18A9 ; Other_Alphabetic # Mn MONGOLIAN LETTER ALI GALI DAGALGA -1920..1922 ; Other_Alphabetic # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U -1923..1926 ; Other_Alphabetic # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU -1927..1928 ; Other_Alphabetic # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O -1929..192B ; Other_Alphabetic # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA -1930..1931 ; Other_Alphabetic # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA -1932 ; Other_Alphabetic # Mn LIMBU SMALL LETTER ANUSVARA -1933..1938 ; Other_Alphabetic # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA -1A17..1A18 ; Other_Alphabetic # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U -1A19..1A1A ; Other_Alphabetic # Mc [2] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN O -1A1B ; Other_Alphabetic # Mn BUGINESE VOWEL SIGN AE -1A55 ; Other_Alphabetic # Mc TAI THAM CONSONANT SIGN MEDIAL RA -1A56 ; Other_Alphabetic # Mn TAI THAM CONSONANT SIGN MEDIAL LA -1A57 ; Other_Alphabetic # Mc TAI THAM CONSONANT SIGN LA TANG LAI -1A58..1A5E ; Other_Alphabetic # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA -1A61 ; Other_Alphabetic # Mc TAI THAM VOWEL SIGN A -1A62 ; Other_Alphabetic # Mn TAI THAM VOWEL SIGN MAI SAT -1A63..1A64 ; Other_Alphabetic # Mc [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA -1A65..1A6C ; Other_Alphabetic # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW -1A6D..1A72 ; Other_Alphabetic # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI -1A73..1A74 ; Other_Alphabetic # Mn [2] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN MAI KANG -1B00..1B03 ; Other_Alphabetic # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG -1B04 ; Other_Alphabetic # Mc BALINESE SIGN BISAH -1B35 ; Other_Alphabetic # Mc BALINESE VOWEL SIGN TEDUNG -1B36..1B3A ; Other_Alphabetic # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA -1B3B ; Other_Alphabetic # Mc BALINESE VOWEL SIGN RA REPA TEDUNG -1B3C ; Other_Alphabetic # Mn BALINESE VOWEL SIGN LA LENGA -1B3D..1B41 ; Other_Alphabetic # Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG -1B42 ; Other_Alphabetic # Mn BALINESE VOWEL SIGN PEPET -1B43 ; Other_Alphabetic # Mc BALINESE VOWEL SIGN PEPET TEDUNG -1B80..1B81 ; Other_Alphabetic # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR -1B82 ; Other_Alphabetic # Mc SUNDANESE SIGN PANGWISAD -1BA1 ; Other_Alphabetic # Mc SUNDANESE CONSONANT SIGN PAMINGKAL -1BA2..1BA5 ; Other_Alphabetic # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU -1BA6..1BA7 ; Other_Alphabetic # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG -1BA8..1BA9 ; Other_Alphabetic # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG -1BAC..1BAD ; Other_Alphabetic # Mn [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA -1BE7 ; Other_Alphabetic # Mc BATAK VOWEL SIGN E -1BE8..1BE9 ; Other_Alphabetic # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE -1BEA..1BEC ; Other_Alphabetic # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O -1BED ; Other_Alphabetic # Mn BATAK VOWEL SIGN KARO O -1BEE ; Other_Alphabetic # Mc BATAK VOWEL SIGN U -1BEF..1BF1 ; Other_Alphabetic # Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H -1C24..1C2B ; Other_Alphabetic # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU -1C2C..1C33 ; Other_Alphabetic # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T -1C34..1C35 ; Other_Alphabetic # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG -1CF2..1CF3 ; Other_Alphabetic # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA -1DE7..1DF4 ; Other_Alphabetic # Mn [14] COMBINING LATIN SMALL LETTER ALPHA..COMBINING LATIN SMALL LETTER U WITH DIAERESIS -24B6..24E9 ; Other_Alphabetic # So [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z -2DE0..2DFF ; Other_Alphabetic # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS -A674..A67B ; Other_Alphabetic # Mn [8] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC LETTER OMEGA -A69E..A69F ; Other_Alphabetic # Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E -A823..A824 ; Other_Alphabetic # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I -A825..A826 ; Other_Alphabetic # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E -A827 ; Other_Alphabetic # Mc SYLOTI NAGRI VOWEL SIGN OO -A880..A881 ; Other_Alphabetic # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA -A8B4..A8C3 ; Other_Alphabetic # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU -A8C5 ; Other_Alphabetic # Mn SAURASHTRA SIGN CANDRABINDU -A926..A92A ; Other_Alphabetic # Mn [5] KAYAH LI VOWEL UE..KAYAH LI VOWEL O -A947..A951 ; Other_Alphabetic # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R -A952 ; Other_Alphabetic # Mc REJANG CONSONANT SIGN H -A980..A982 ; Other_Alphabetic # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR -A983 ; Other_Alphabetic # Mc JAVANESE SIGN WIGNYAN -A9B4..A9B5 ; Other_Alphabetic # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG -A9B6..A9B9 ; Other_Alphabetic # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT -A9BA..A9BB ; Other_Alphabetic # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE -A9BC ; Other_Alphabetic # Mn JAVANESE VOWEL SIGN PEPET -A9BD..A9BF ; Other_Alphabetic # Mc [3] JAVANESE CONSONANT SIGN KERET..JAVANESE CONSONANT SIGN CAKRA -AA29..AA2E ; Other_Alphabetic # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE -AA2F..AA30 ; Other_Alphabetic # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI -AA31..AA32 ; Other_Alphabetic # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE -AA33..AA34 ; Other_Alphabetic # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA -AA35..AA36 ; Other_Alphabetic # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA -AA43 ; Other_Alphabetic # Mn CHAM CONSONANT SIGN FINAL NG -AA4C ; Other_Alphabetic # Mn CHAM CONSONANT SIGN FINAL M -AA4D ; Other_Alphabetic # Mc CHAM CONSONANT SIGN FINAL H -AAB0 ; Other_Alphabetic # Mn TAI VIET MAI KANG -AAB2..AAB4 ; Other_Alphabetic # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U -AAB7..AAB8 ; Other_Alphabetic # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA -AABE ; Other_Alphabetic # Mn TAI VIET VOWEL AM -AAEB ; Other_Alphabetic # Mc MEETEI MAYEK VOWEL SIGN II -AAEC..AAED ; Other_Alphabetic # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI -AAEE..AAEF ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU -AAF5 ; Other_Alphabetic # Mc MEETEI MAYEK VOWEL SIGN VISARGA -ABE3..ABE4 ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP -ABE5 ; Other_Alphabetic # Mn MEETEI MAYEK VOWEL SIGN ANAP -ABE6..ABE7 ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP -ABE8 ; Other_Alphabetic # Mn MEETEI MAYEK VOWEL SIGN UNAP -ABE9..ABEA ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG -FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA -10376..1037A ; Other_Alphabetic # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII -10A01..10A03 ; Other_Alphabetic # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R -10A05..10A06 ; Other_Alphabetic # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O -10A0C..10A0F ; Other_Alphabetic # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA -11000 ; Other_Alphabetic # Mc BRAHMI SIGN CANDRABINDU -11001 ; Other_Alphabetic # Mn BRAHMI SIGN ANUSVARA -11002 ; Other_Alphabetic # Mc BRAHMI SIGN VISARGA -11038..11045 ; Other_Alphabetic # Mn [14] BRAHMI VOWEL SIGN AA..BRAHMI VOWEL SIGN AU -11082 ; Other_Alphabetic # Mc KAITHI SIGN VISARGA -110B0..110B2 ; Other_Alphabetic # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II -110B3..110B6 ; Other_Alphabetic # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI -110B7..110B8 ; Other_Alphabetic # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU -11100..11102 ; Other_Alphabetic # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA -11127..1112B ; Other_Alphabetic # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU -1112C ; Other_Alphabetic # Mc CHAKMA VOWEL SIGN E -1112D..11132 ; Other_Alphabetic # Mn [6] CHAKMA VOWEL SIGN AI..CHAKMA AU MARK -11180..11181 ; Other_Alphabetic # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA -11182 ; Other_Alphabetic # Mc SHARADA SIGN VISARGA -111B3..111B5 ; Other_Alphabetic # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II -111B6..111BE ; Other_Alphabetic # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O -111BF ; Other_Alphabetic # Mc SHARADA VOWEL SIGN AU -1122C..1122E ; Other_Alphabetic # Mc [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II -1122F..11231 ; Other_Alphabetic # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI -11232..11233 ; Other_Alphabetic # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU -11234 ; Other_Alphabetic # Mn KHOJKI SIGN ANUSVARA -11237 ; Other_Alphabetic # Mn KHOJKI SIGN SHADDA -1123E ; Other_Alphabetic # Mn KHOJKI SIGN SUKUN -112DF ; Other_Alphabetic # Mn KHUDAWADI SIGN ANUSVARA -112E0..112E2 ; Other_Alphabetic # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II -112E3..112E8 ; Other_Alphabetic # Mn [6] KHUDAWADI VOWEL SIGN U..KHUDAWADI VOWEL SIGN AU -11300..11301 ; Other_Alphabetic # Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU -11302..11303 ; Other_Alphabetic # Mc [2] GRANTHA SIGN ANUSVARA..GRANTHA SIGN VISARGA -1133E..1133F ; Other_Alphabetic # Mc [2] GRANTHA VOWEL SIGN AA..GRANTHA VOWEL SIGN I -11340 ; Other_Alphabetic # Mn GRANTHA VOWEL SIGN II -11341..11344 ; Other_Alphabetic # Mc [4] GRANTHA VOWEL SIGN U..GRANTHA VOWEL SIGN VOCALIC RR -11347..11348 ; Other_Alphabetic # Mc [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI -1134B..1134C ; Other_Alphabetic # Mc [2] GRANTHA VOWEL SIGN OO..GRANTHA VOWEL SIGN AU -11357 ; Other_Alphabetic # Mc GRANTHA AU LENGTH MARK -11362..11363 ; Other_Alphabetic # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL -11435..11437 ; Other_Alphabetic # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II -11438..1143F ; Other_Alphabetic # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI -11440..11441 ; Other_Alphabetic # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU -11443..11444 ; Other_Alphabetic # Mn [2] NEWA SIGN CANDRABINDU..NEWA SIGN ANUSVARA -11445 ; Other_Alphabetic # Mc NEWA SIGN VISARGA -114B0..114B2 ; Other_Alphabetic # Mc [3] TIRHUTA VOWEL SIGN AA..TIRHUTA VOWEL SIGN II -114B3..114B8 ; Other_Alphabetic # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL -114B9 ; Other_Alphabetic # Mc TIRHUTA VOWEL SIGN E -114BA ; Other_Alphabetic # Mn TIRHUTA VOWEL SIGN SHORT E -114BB..114BE ; Other_Alphabetic # Mc [4] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN AU -114BF..114C0 ; Other_Alphabetic # Mn [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA -114C1 ; Other_Alphabetic # Mc TIRHUTA SIGN VISARGA -115AF..115B1 ; Other_Alphabetic # Mc [3] SIDDHAM VOWEL SIGN AA..SIDDHAM VOWEL SIGN II -115B2..115B5 ; Other_Alphabetic # Mn [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR -115B8..115BB ; Other_Alphabetic # Mc [4] SIDDHAM VOWEL SIGN E..SIDDHAM VOWEL SIGN AU -115BC..115BD ; Other_Alphabetic # Mn [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA -115BE ; Other_Alphabetic # Mc SIDDHAM SIGN VISARGA -115DC..115DD ; Other_Alphabetic # Mn [2] SIDDHAM VOWEL SIGN ALTERNATE U..SIDDHAM VOWEL SIGN ALTERNATE UU -11630..11632 ; Other_Alphabetic # Mc [3] MODI VOWEL SIGN AA..MODI VOWEL SIGN II -11633..1163A ; Other_Alphabetic # Mn [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI -1163B..1163C ; Other_Alphabetic # Mc [2] MODI VOWEL SIGN O..MODI VOWEL SIGN AU -1163D ; Other_Alphabetic # Mn MODI SIGN ANUSVARA -1163E ; Other_Alphabetic # Mc MODI SIGN VISARGA -11640 ; Other_Alphabetic # Mn MODI SIGN ARDHACANDRA -116AB ; Other_Alphabetic # Mn TAKRI SIGN ANUSVARA -116AC ; Other_Alphabetic # Mc TAKRI SIGN VISARGA -116AD ; Other_Alphabetic # Mn TAKRI VOWEL SIGN AA -116AE..116AF ; Other_Alphabetic # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II -116B0..116B5 ; Other_Alphabetic # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU -1171D..1171F ; Other_Alphabetic # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA -11720..11721 ; Other_Alphabetic # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA -11722..11725 ; Other_Alphabetic # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU -11726 ; Other_Alphabetic # Mc AHOM VOWEL SIGN E -11727..1172A ; Other_Alphabetic # Mn [4] AHOM VOWEL SIGN AW..AHOM VOWEL SIGN AM -11A01..11A06 ; Other_Alphabetic # Mn [6] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL SIGN O -11A07..11A08 ; Other_Alphabetic # Mc [2] ZANABAZAR SQUARE VOWEL SIGN AI..ZANABAZAR SQUARE VOWEL SIGN AU -11A09..11A0A ; Other_Alphabetic # Mn [2] ZANABAZAR SQUARE VOWEL SIGN REVERSED I..ZANABAZAR SQUARE VOWEL LENGTH MARK -11A35..11A38 ; Other_Alphabetic # Mn [4] ZANABAZAR SQUARE SIGN CANDRABINDU..ZANABAZAR SQUARE SIGN ANUSVARA -11A39 ; Other_Alphabetic # Mc ZANABAZAR SQUARE SIGN VISARGA -11A3B..11A3E ; Other_Alphabetic # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA -11A51..11A56 ; Other_Alphabetic # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE -11A57..11A58 ; Other_Alphabetic # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU -11A59..11A5B ; Other_Alphabetic # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK -11A8A..11A96 ; Other_Alphabetic # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA -11A97 ; Other_Alphabetic # Mc SOYOMBO SIGN VISARGA -11C2F ; Other_Alphabetic # Mc BHAIKSUKI VOWEL SIGN AA -11C30..11C36 ; Other_Alphabetic # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L -11C38..11C3D ; Other_Alphabetic # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA -11C3E ; Other_Alphabetic # Mc BHAIKSUKI SIGN VISARGA -11C92..11CA7 ; Other_Alphabetic # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA -11CA9 ; Other_Alphabetic # Mc MARCHEN SUBJOINED LETTER YA -11CAA..11CB0 ; Other_Alphabetic # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA -11CB1 ; Other_Alphabetic # Mc MARCHEN VOWEL SIGN I -11CB2..11CB3 ; Other_Alphabetic # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E -11CB4 ; Other_Alphabetic # Mc MARCHEN VOWEL SIGN O -11CB5..11CB6 ; Other_Alphabetic # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU -11D31..11D36 ; Other_Alphabetic # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R -11D3A ; Other_Alphabetic # Mn MASARAM GONDI VOWEL SIGN E -11D3C..11D3D ; Other_Alphabetic # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O -11D3F..11D41 ; Other_Alphabetic # Mn [3] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI SIGN VISARGA -11D43 ; Other_Alphabetic # Mn MASARAM GONDI SIGN CANDRA -11D47 ; Other_Alphabetic # Mn MASARAM GONDI RA-KARA -16B30..16B36 ; Other_Alphabetic # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM -16F51..16F7E ; Other_Alphabetic # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG -1BC9E ; Other_Alphabetic # Mn DUPLOYAN DOUBLE MARK -1E000..1E006 ; Other_Alphabetic # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE -1E008..1E018 ; Other_Alphabetic # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU -1E01B..1E021 ; Other_Alphabetic # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI -1E023..1E024 ; Other_Alphabetic # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS -1E026..1E02A ; Other_Alphabetic # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA -1E947 ; Other_Alphabetic # Mn ADLAM HAMZA -1F130..1F149 ; Other_Alphabetic # So [26] SQUARED LATIN CAPITAL LETTER A..SQUARED LATIN CAPITAL LETTER Z -1F150..1F169 ; Other_Alphabetic # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z -1F170..1F189 ; Other_Alphabetic # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z - -# Total code points: 1300 - -# ================================================ - -3006 ; Ideographic # Lo IDEOGRAPHIC CLOSING MARK -3007 ; Ideographic # Nl IDEOGRAPHIC NUMBER ZERO -3021..3029 ; Ideographic # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE -3038..303A ; Ideographic # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY -3400..4DB5 ; Ideographic # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 -4E00..9FEA ; Ideographic # Lo [20971] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FEA -F900..FA6D ; Ideographic # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D -FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 -17000..187EC ; Ideographic # Lo [6125] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187EC -18800..18AF2 ; Ideographic # Lo [755] TANGUT COMPONENT-001..TANGUT COMPONENT-755 -1B170..1B2FB ; Ideographic # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB -20000..2A6D6 ; Ideographic # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 -2A700..2B734 ; Ideographic # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 -2B740..2B81D ; Ideographic # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D -2B820..2CEA1 ; Ideographic # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 -2CEB0..2EBE0 ; Ideographic # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 -2F800..2FA1D ; Ideographic # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D - -# Total code points: 96174 - -# ================================================ - -005E ; Diacritic # Sk CIRCUMFLEX ACCENT -0060 ; Diacritic # Sk GRAVE ACCENT -00A8 ; Diacritic # Sk DIAERESIS -00AF ; Diacritic # Sk MACRON -00B4 ; Diacritic # Sk ACUTE ACCENT -00B7 ; Diacritic # Po MIDDLE DOT -00B8 ; Diacritic # Sk CEDILLA -02B0..02C1 ; Diacritic # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP -02C2..02C5 ; Diacritic # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD -02C6..02D1 ; Diacritic # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON -02D2..02DF ; Diacritic # Sk [14] MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER CROSS ACCENT -02E0..02E4 ; Diacritic # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP -02E5..02EB ; Diacritic # Sk [7] MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER YANG DEPARTING TONE MARK -02EC ; Diacritic # Lm MODIFIER LETTER VOICING -02ED ; Diacritic # Sk MODIFIER LETTER UNASPIRATED -02EE ; Diacritic # Lm MODIFIER LETTER DOUBLE APOSTROPHE -02EF..02FF ; Diacritic # Sk [17] MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER LETTER LOW LEFT ARROW -0300..034E ; Diacritic # Mn [79] COMBINING GRAVE ACCENT..COMBINING UPWARDS ARROW BELOW -0350..0357 ; Diacritic # Mn [8] COMBINING RIGHT ARROWHEAD ABOVE..COMBINING RIGHT HALF RING ABOVE -035D..0362 ; Diacritic # Mn [6] COMBINING DOUBLE BREVE..COMBINING DOUBLE RIGHTWARDS ARROW BELOW -0374 ; Diacritic # Lm GREEK NUMERAL SIGN -0375 ; Diacritic # Sk GREEK LOWER NUMERAL SIGN -037A ; Diacritic # Lm GREEK YPOGEGRAMMENI -0384..0385 ; Diacritic # Sk [2] GREEK TONOS..GREEK DIALYTIKA TONOS -0483..0487 ; Diacritic # Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE -0559 ; Diacritic # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING -0591..05A1 ; Diacritic # Mn [17] HEBREW ACCENT ETNAHTA..HEBREW ACCENT PAZER -05A3..05BD ; Diacritic # Mn [27] HEBREW ACCENT MUNAH..HEBREW POINT METEG -05BF ; Diacritic # Mn HEBREW POINT RAFE -05C1..05C2 ; Diacritic # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT -05C4 ; Diacritic # Mn HEBREW MARK UPPER DOT -064B..0652 ; Diacritic # Mn [8] ARABIC FATHATAN..ARABIC SUKUN -0657..0658 ; Diacritic # Mn [2] ARABIC INVERTED DAMMA..ARABIC MARK NOON GHUNNA -06DF..06E0 ; Diacritic # Mn [2] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH UPRIGHT RECTANGULAR ZERO -06E5..06E6 ; Diacritic # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH -06EA..06EC ; Diacritic # Mn [3] ARABIC EMPTY CENTRE LOW STOP..ARABIC ROUNDED HIGH STOP WITH FILLED CENTRE -0730..074A ; Diacritic # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH -07A6..07B0 ; Diacritic # Mn [11] THAANA ABAFILI..THAANA SUKUN -07EB..07F3 ; Diacritic # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE -07F4..07F5 ; Diacritic # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE -0818..0819 ; Diacritic # Mn [2] SAMARITAN MARK OCCLUSION..SAMARITAN MARK DAGESH -08E3..08FE ; Diacritic # Mn [28] ARABIC TURNED DAMMA BELOW..ARABIC DAMMA WITH DOT -093C ; Diacritic # Mn DEVANAGARI SIGN NUKTA -094D ; Diacritic # Mn DEVANAGARI SIGN VIRAMA -0951..0954 ; Diacritic # Mn [4] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI ACUTE ACCENT -0971 ; Diacritic # Lm DEVANAGARI SIGN HIGH SPACING DOT -09BC ; Diacritic # Mn BENGALI SIGN NUKTA -09CD ; Diacritic # Mn BENGALI SIGN VIRAMA -0A3C ; Diacritic # Mn GURMUKHI SIGN NUKTA -0A4D ; Diacritic # Mn GURMUKHI SIGN VIRAMA -0ABC ; Diacritic # Mn GUJARATI SIGN NUKTA -0ACD ; Diacritic # Mn GUJARATI SIGN VIRAMA -0AFD..0AFF ; Diacritic # Mn [3] GUJARATI SIGN THREE-DOT NUKTA ABOVE..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE -0B3C ; Diacritic # Mn ORIYA SIGN NUKTA -0B4D ; Diacritic # Mn ORIYA SIGN VIRAMA -0BCD ; Diacritic # Mn TAMIL SIGN VIRAMA -0C4D ; Diacritic # Mn TELUGU SIGN VIRAMA -0CBC ; Diacritic # Mn KANNADA SIGN NUKTA -0CCD ; Diacritic # Mn KANNADA SIGN VIRAMA -0D3B..0D3C ; Diacritic # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA -0D4D ; Diacritic # Mn MALAYALAM SIGN VIRAMA -0DCA ; Diacritic # Mn SINHALA SIGN AL-LAKUNA -0E47..0E4C ; Diacritic # Mn [6] THAI CHARACTER MAITAIKHU..THAI CHARACTER THANTHAKHAT -0E4E ; Diacritic # Mn THAI CHARACTER YAMAKKAN -0EC8..0ECC ; Diacritic # Mn [5] LAO TONE MAI EK..LAO CANCELLATION MARK -0F18..0F19 ; Diacritic # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS -0F35 ; Diacritic # Mn TIBETAN MARK NGAS BZUNG NYI ZLA -0F37 ; Diacritic # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS -0F39 ; Diacritic # Mn TIBETAN MARK TSA -PHRU -0F3E..0F3F ; Diacritic # Mc [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES -0F82..0F84 ; Diacritic # Mn [3] TIBETAN SIGN NYI ZLA NAA DA..TIBETAN MARK HALANTA -0F86..0F87 ; Diacritic # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS -0FC6 ; Diacritic # Mn TIBETAN SYMBOL PADMA GDAN -1037 ; Diacritic # Mn MYANMAR SIGN DOT BELOW -1039..103A ; Diacritic # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT -1087..108C ; Diacritic # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3 -108D ; Diacritic # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE -108F ; Diacritic # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5 -109A..109B ; Diacritic # Mc [2] MYANMAR SIGN KHAMTI TONE-1..MYANMAR SIGN KHAMTI TONE-3 -17C9..17D3 ; Diacritic # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT -17DD ; Diacritic # Mn KHMER SIGN ATTHACAN -1939..193B ; Diacritic # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I -1A75..1A7C ; Diacritic # Mn [8] TAI THAM SIGN TONE-1..TAI THAM SIGN KHUEN-LUE KARAN -1A7F ; Diacritic # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT -1AB0..1ABD ; Diacritic # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW -1B34 ; Diacritic # Mn BALINESE SIGN REREKAN -1B44 ; Diacritic # Mc BALINESE ADEG ADEG -1B6B..1B73 ; Diacritic # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG -1BAA ; Diacritic # Mc SUNDANESE SIGN PAMAAEH -1BAB ; Diacritic # Mn SUNDANESE SIGN VIRAMA -1C36..1C37 ; Diacritic # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA -1C78..1C7D ; Diacritic # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD -1CD0..1CD2 ; Diacritic # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA -1CD3 ; Diacritic # Po VEDIC SIGN NIHSHVASA -1CD4..1CE0 ; Diacritic # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA -1CE1 ; Diacritic # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA -1CE2..1CE8 ; Diacritic # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL -1CED ; Diacritic # Mn VEDIC SIGN TIRYAK -1CF4 ; Diacritic # Mn VEDIC TONE CANDRA ABOVE -1CF7 ; Diacritic # Mc VEDIC SIGN ATIKRAMA -1CF8..1CF9 ; Diacritic # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE -1D2C..1D6A ; Diacritic # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI -1DC4..1DCF ; Diacritic # Mn [12] COMBINING MACRON-ACUTE..COMBINING ZIGZAG BELOW -1DF5..1DF9 ; Diacritic # Mn [5] COMBINING UP TACK ABOVE..COMBINING WIDE INVERTED BRIDGE BELOW -1DFD..1DFF ; Diacritic # Mn [3] COMBINING ALMOST EQUAL TO BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW -1FBD ; Diacritic # Sk GREEK KORONIS -1FBF..1FC1 ; Diacritic # Sk [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI -1FCD..1FCF ; Diacritic # Sk [3] GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI -1FDD..1FDF ; Diacritic # Sk [3] GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI -1FED..1FEF ; Diacritic # Sk [3] GREEK DIALYTIKA AND VARIA..GREEK VARIA -1FFD..1FFE ; Diacritic # Sk [2] GREEK OXIA..GREEK DASIA -2CEF..2CF1 ; Diacritic # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS -2E2F ; Diacritic # Lm VERTICAL TILDE -302A..302D ; Diacritic # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK -302E..302F ; Diacritic # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK -3099..309A ; Diacritic # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK -309B..309C ; Diacritic # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK -30FC ; Diacritic # Lm KATAKANA-HIRAGANA PROLONGED SOUND MARK -A66F ; Diacritic # Mn COMBINING CYRILLIC VZMET -A67C..A67D ; Diacritic # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK -A67F ; Diacritic # Lm CYRILLIC PAYEROK -A69C..A69D ; Diacritic # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN -A6F0..A6F1 ; Diacritic # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS -A717..A71F ; Diacritic # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK -A720..A721 ; Diacritic # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE -A788 ; Diacritic # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT -A7F8..A7F9 ; Diacritic # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE -A8C4 ; Diacritic # Mn SAURASHTRA SIGN VIRAMA -A8E0..A8F1 ; Diacritic # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA -A92B..A92D ; Diacritic # Mn [3] KAYAH LI TONE PLOPHU..KAYAH LI TONE CALYA PLOPHU -A92E ; Diacritic # Po KAYAH LI SIGN CWI -A953 ; Diacritic # Mc REJANG VIRAMA -A9B3 ; Diacritic # Mn JAVANESE SIGN CECAK TELU -A9C0 ; Diacritic # Mc JAVANESE PANGKON -A9E5 ; Diacritic # Mn MYANMAR SIGN SHAN SAW -AA7B ; Diacritic # Mc MYANMAR SIGN PAO KAREN TONE -AA7C ; Diacritic # Mn MYANMAR SIGN TAI LAING TONE-2 -AA7D ; Diacritic # Mc MYANMAR SIGN TAI LAING TONE-5 -AABF ; Diacritic # Mn TAI VIET TONE MAI EK -AAC0 ; Diacritic # Lo TAI VIET TONE MAI NUENG -AAC1 ; Diacritic # Mn TAI VIET TONE MAI THO -AAC2 ; Diacritic # Lo TAI VIET TONE MAI SONG -AAF6 ; Diacritic # Mn MEETEI MAYEK VIRAMA -AB5B ; Diacritic # Sk MODIFIER BREVE WITH INVERTED BREVE -AB5C..AB5F ; Diacritic # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK -ABEC ; Diacritic # Mc MEETEI MAYEK LUM IYEK -ABED ; Diacritic # Mn MEETEI MAYEK APUN IYEK -FB1E ; Diacritic # Mn HEBREW POINT JUDEO-SPANISH VARIKA -FE20..FE2F ; Diacritic # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF -FF3E ; Diacritic # Sk FULLWIDTH CIRCUMFLEX ACCENT -FF40 ; Diacritic # Sk FULLWIDTH GRAVE ACCENT -FF70 ; Diacritic # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK -FF9E..FF9F ; Diacritic # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK -FFE3 ; Diacritic # Sk FULLWIDTH MACRON -102E0 ; Diacritic # Mn COPTIC EPACT THOUSANDS MARK -10AE5..10AE6 ; Diacritic # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW -110B9..110BA ; Diacritic # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA -11133..11134 ; Diacritic # Mn [2] CHAKMA VIRAMA..CHAKMA MAAYYAA -11173 ; Diacritic # Mn MAHAJANI SIGN NUKTA -111C0 ; Diacritic # Mc SHARADA SIGN VIRAMA -111CA..111CC ; Diacritic # Mn [3] SHARADA SIGN NUKTA..SHARADA EXTRA SHORT VOWEL MARK -11235 ; Diacritic # Mc KHOJKI SIGN VIRAMA -11236 ; Diacritic # Mn KHOJKI SIGN NUKTA -112E9..112EA ; Diacritic # Mn [2] KHUDAWADI SIGN NUKTA..KHUDAWADI SIGN VIRAMA -1133C ; Diacritic # Mn GRANTHA SIGN NUKTA -1134D ; Diacritic # Mc GRANTHA SIGN VIRAMA -11366..1136C ; Diacritic # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX -11370..11374 ; Diacritic # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA -11442 ; Diacritic # Mn NEWA SIGN VIRAMA -11446 ; Diacritic # Mn NEWA SIGN NUKTA -114C2..114C3 ; Diacritic # Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA -115BF..115C0 ; Diacritic # Mn [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA -1163F ; Diacritic # Mn MODI SIGN VIRAMA -116B6 ; Diacritic # Mc TAKRI SIGN VIRAMA -116B7 ; Diacritic # Mn TAKRI SIGN NUKTA -1172B ; Diacritic # Mn AHOM SIGN KILLER -11A34 ; Diacritic # Mn ZANABAZAR SQUARE SIGN VIRAMA -11A47 ; Diacritic # Mn ZANABAZAR SQUARE SUBJOINER -11A99 ; Diacritic # Mn SOYOMBO SUBJOINER -11C3F ; Diacritic # Mn BHAIKSUKI SIGN VIRAMA -11D42 ; Diacritic # Mn MASARAM GONDI SIGN NUKTA -11D44..11D45 ; Diacritic # Mn [2] MASARAM GONDI SIGN HALANTA..MASARAM GONDI VIRAMA -16AF0..16AF4 ; Diacritic # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE -16F8F..16F92 ; Diacritic # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW -16F93..16F9F ; Diacritic # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 -1D167..1D169 ; Diacritic # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 -1D16D..1D172 ; Diacritic # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 -1D17B..1D182 ; Diacritic # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE -1D185..1D18B ; Diacritic # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE -1D1AA..1D1AD ; Diacritic # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO -1E8D0..1E8D6 ; Diacritic # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS -1E944..1E946 ; Diacritic # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK -1E948..1E94A ; Diacritic # Mn [3] ADLAM CONSONANT MODIFIER..ADLAM NUKTA - -# Total code points: 798 - -# ================================================ - -00B7 ; Extender # Po MIDDLE DOT -02D0..02D1 ; Extender # Lm [2] MODIFIER LETTER TRIANGULAR COLON..MODIFIER LETTER HALF TRIANGULAR COLON -0640 ; Extender # Lm ARABIC TATWEEL -07FA ; Extender # Lm NKO LAJANYALAN -0E46 ; Extender # Lm THAI CHARACTER MAIYAMOK -0EC6 ; Extender # Lm LAO KO LA -180A ; Extender # Po MONGOLIAN NIRUGU -1843 ; Extender # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN -1AA7 ; Extender # Lm TAI THAM SIGN MAI YAMOK -1C36 ; Extender # Mn LEPCHA SIGN RAN -1C7B ; Extender # Lm OL CHIKI RELAA -3005 ; Extender # Lm IDEOGRAPHIC ITERATION MARK -3031..3035 ; Extender # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF -309D..309E ; Extender # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK -30FC..30FE ; Extender # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK -A015 ; Extender # Lm YI SYLLABLE WU -A60C ; Extender # Lm VAI SYLLABLE LENGTHENER -A9CF ; Extender # Lm JAVANESE PANGRANGKEP -A9E6 ; Extender # Lm MYANMAR MODIFIER LETTER SHAN REDUPLICATION -AA70 ; Extender # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION -AADD ; Extender # Lm TAI VIET SYMBOL SAM -AAF3..AAF4 ; Extender # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK -FF70 ; Extender # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK -1135D ; Extender # Lo GRANTHA SIGN PLUTA -115C6..115C8 ; Extender # Po [3] SIDDHAM REPETITION MARK-1..SIDDHAM REPETITION MARK-3 -11A98 ; Extender # Mn SOYOMBO GEMINATION MARK -16B42..16B43 ; Extender # Lm [2] PAHAWH HMONG SIGN VOS NRUA..PAHAWH HMONG SIGN IB YAM -16FE0..16FE1 ; Extender # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK -1E944..1E946 ; Extender # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK - -# Total code points: 44 - -# ================================================ - -00AA ; Other_Lowercase # Lo FEMININE ORDINAL INDICATOR -00BA ; Other_Lowercase # Lo MASCULINE ORDINAL INDICATOR -02B0..02B8 ; Other_Lowercase # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y -02C0..02C1 ; Other_Lowercase # Lm [2] MODIFIER LETTER GLOTTAL STOP..MODIFIER LETTER REVERSED GLOTTAL STOP -02E0..02E4 ; Other_Lowercase # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP -0345 ; Other_Lowercase # Mn COMBINING GREEK YPOGEGRAMMENI -037A ; Other_Lowercase # Lm GREEK YPOGEGRAMMENI -1D2C..1D6A ; Other_Lowercase # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI -1D78 ; Other_Lowercase # Lm MODIFIER LETTER CYRILLIC EN -1D9B..1DBF ; Other_Lowercase # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA -2071 ; Other_Lowercase # Lm SUPERSCRIPT LATIN SMALL LETTER I -207F ; Other_Lowercase # Lm SUPERSCRIPT LATIN SMALL LETTER N -2090..209C ; Other_Lowercase # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T -2170..217F ; Other_Lowercase # Nl [16] SMALL ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL ONE THOUSAND -24D0..24E9 ; Other_Lowercase # So [26] CIRCLED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z -2C7C..2C7D ; Other_Lowercase # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V -A69C..A69D ; Other_Lowercase # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN -A770 ; Other_Lowercase # Lm MODIFIER LETTER US -A7F8..A7F9 ; Other_Lowercase # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE -AB5C..AB5F ; Other_Lowercase # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK - -# Total code points: 189 - -# ================================================ - -2160..216F ; Other_Uppercase # Nl [16] ROMAN NUMERAL ONE..ROMAN NUMERAL ONE THOUSAND -24B6..24CF ; Other_Uppercase # So [26] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN CAPITAL LETTER Z -1F130..1F149 ; Other_Uppercase # So [26] SQUARED LATIN CAPITAL LETTER A..SQUARED LATIN CAPITAL LETTER Z -1F150..1F169 ; Other_Uppercase # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z -1F170..1F189 ; Other_Uppercase # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z - -# Total code points: 120 - -# ================================================ - -FDD0..FDEF ; Noncharacter_Code_Point # Cn [32] .. -FFFE..FFFF ; Noncharacter_Code_Point # Cn [2] .. -1FFFE..1FFFF ; Noncharacter_Code_Point # Cn [2] .. -2FFFE..2FFFF ; Noncharacter_Code_Point # Cn [2] .. -3FFFE..3FFFF ; Noncharacter_Code_Point # Cn [2] .. -4FFFE..4FFFF ; Noncharacter_Code_Point # Cn [2] .. -5FFFE..5FFFF ; Noncharacter_Code_Point # Cn [2] .. -6FFFE..6FFFF ; Noncharacter_Code_Point # Cn [2] .. -7FFFE..7FFFF ; Noncharacter_Code_Point # Cn [2] .. -8FFFE..8FFFF ; Noncharacter_Code_Point # Cn [2] .. -9FFFE..9FFFF ; Noncharacter_Code_Point # Cn [2] .. -AFFFE..AFFFF ; Noncharacter_Code_Point # Cn [2] .. -BFFFE..BFFFF ; Noncharacter_Code_Point # Cn [2] .. -CFFFE..CFFFF ; Noncharacter_Code_Point # Cn [2] .. -DFFFE..DFFFF ; Noncharacter_Code_Point # Cn [2] .. -EFFFE..EFFFF ; Noncharacter_Code_Point # Cn [2] .. -FFFFE..FFFFF ; Noncharacter_Code_Point # Cn [2] .. -10FFFE..10FFFF; Noncharacter_Code_Point # Cn [2] .. - -# Total code points: 66 - -# ================================================ - -09BE ; Other_Grapheme_Extend # Mc BENGALI VOWEL SIGN AA -09D7 ; Other_Grapheme_Extend # Mc BENGALI AU LENGTH MARK -0B3E ; Other_Grapheme_Extend # Mc ORIYA VOWEL SIGN AA -0B57 ; Other_Grapheme_Extend # Mc ORIYA AU LENGTH MARK -0BBE ; Other_Grapheme_Extend # Mc TAMIL VOWEL SIGN AA -0BD7 ; Other_Grapheme_Extend # Mc TAMIL AU LENGTH MARK -0CC2 ; Other_Grapheme_Extend # Mc KANNADA VOWEL SIGN UU -0CD5..0CD6 ; Other_Grapheme_Extend # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0D3E ; Other_Grapheme_Extend # Mc MALAYALAM VOWEL SIGN AA -0D57 ; Other_Grapheme_Extend # Mc MALAYALAM AU LENGTH MARK -0DCF ; Other_Grapheme_Extend # Mc SINHALA VOWEL SIGN AELA-PILLA -0DDF ; Other_Grapheme_Extend # Mc SINHALA VOWEL SIGN GAYANUKITTA -200C ; Other_Grapheme_Extend # Cf ZERO WIDTH NON-JOINER -302E..302F ; Other_Grapheme_Extend # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK -FF9E..FF9F ; Other_Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK -1133E ; Other_Grapheme_Extend # Mc GRANTHA VOWEL SIGN AA -11357 ; Other_Grapheme_Extend # Mc GRANTHA AU LENGTH MARK -114B0 ; Other_Grapheme_Extend # Mc TIRHUTA VOWEL SIGN AA -114BD ; Other_Grapheme_Extend # Mc TIRHUTA VOWEL SIGN SHORT O -115AF ; Other_Grapheme_Extend # Mc SIDDHAM VOWEL SIGN AA -1D165 ; Other_Grapheme_Extend # Mc MUSICAL SYMBOL COMBINING STEM -1D16E..1D172 ; Other_Grapheme_Extend # Mc [5] MUSICAL SYMBOL COMBINING FLAG-1..MUSICAL SYMBOL COMBINING FLAG-5 -E0020..E007F ; Other_Grapheme_Extend # Cf [96] TAG SPACE..CANCEL TAG - -# Total code points: 125 - -# ================================================ - -2FF0..2FF1 ; IDS_Binary_Operator # So [2] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO BELOW -2FF4..2FFB ; IDS_Binary_Operator # So [8] IDEOGRAPHIC DESCRIPTION CHARACTER FULL SURROUND..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID - -# Total code points: 10 - -# ================================================ - -2FF2..2FF3 ; IDS_Trinary_Operator # So [2] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO MIDDLE AND RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO MIDDLE AND BELOW - -# Total code points: 2 - -# ================================================ - -2E80..2E99 ; Radical # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP -2E9B..2EF3 ; Radical # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE -2F00..2FD5 ; Radical # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE - -# Total code points: 329 - -# ================================================ - -3400..4DB5 ; Unified_Ideograph # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 -4E00..9FEA ; Unified_Ideograph # Lo [20971] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FEA -FA0E..FA0F ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA0E..CJK COMPATIBILITY IDEOGRAPH-FA0F -FA11 ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA11 -FA13..FA14 ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA13..CJK COMPATIBILITY IDEOGRAPH-FA14 -FA1F ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA1F -FA21 ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA21 -FA23..FA24 ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA23..CJK COMPATIBILITY IDEOGRAPH-FA24 -FA27..FA29 ; Unified_Ideograph # Lo [3] CJK COMPATIBILITY IDEOGRAPH-FA27..CJK COMPATIBILITY IDEOGRAPH-FA29 -20000..2A6D6 ; Unified_Ideograph # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 -2A700..2B734 ; Unified_Ideograph # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 -2B740..2B81D ; Unified_Ideograph # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D -2B820..2CEA1 ; Unified_Ideograph # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 -2CEB0..2EBE0 ; Unified_Ideograph # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 - -# Total code points: 87882 - -# ================================================ - -034F ; Other_Default_Ignorable_Code_Point # Mn COMBINING GRAPHEME JOINER -115F..1160 ; Other_Default_Ignorable_Code_Point # Lo [2] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG FILLER -17B4..17B5 ; Other_Default_Ignorable_Code_Point # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA -2065 ; Other_Default_Ignorable_Code_Point # Cn -3164 ; Other_Default_Ignorable_Code_Point # Lo HANGUL FILLER -FFA0 ; Other_Default_Ignorable_Code_Point # Lo HALFWIDTH HANGUL FILLER -FFF0..FFF8 ; Other_Default_Ignorable_Code_Point # Cn [9] .. -E0000 ; Other_Default_Ignorable_Code_Point # Cn -E0002..E001F ; Other_Default_Ignorable_Code_Point # Cn [30] .. -E0080..E00FF ; Other_Default_Ignorable_Code_Point # Cn [128] .. -E01F0..E0FFF ; Other_Default_Ignorable_Code_Point # Cn [3600] .. - -# Total code points: 3776 - -# ================================================ - -0149 ; Deprecated # L& LATIN SMALL LETTER N PRECEDED BY APOSTROPHE -0673 ; Deprecated # Lo ARABIC LETTER ALEF WITH WAVY HAMZA BELOW -0F77 ; Deprecated # Mn TIBETAN VOWEL SIGN VOCALIC RR -0F79 ; Deprecated # Mn TIBETAN VOWEL SIGN VOCALIC LL -17A3..17A4 ; Deprecated # Lo [2] KHMER INDEPENDENT VOWEL QAQ..KHMER INDEPENDENT VOWEL QAA -206A..206F ; Deprecated # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES -2329 ; Deprecated # Ps LEFT-POINTING ANGLE BRACKET -232A ; Deprecated # Pe RIGHT-POINTING ANGLE BRACKET -E0001 ; Deprecated # Cf LANGUAGE TAG - -# Total code points: 15 - -# ================================================ - -0069..006A ; Soft_Dotted # L& [2] LATIN SMALL LETTER I..LATIN SMALL LETTER J -012F ; Soft_Dotted # L& LATIN SMALL LETTER I WITH OGONEK -0249 ; Soft_Dotted # L& LATIN SMALL LETTER J WITH STROKE -0268 ; Soft_Dotted # L& LATIN SMALL LETTER I WITH STROKE -029D ; Soft_Dotted # L& LATIN SMALL LETTER J WITH CROSSED-TAIL -02B2 ; Soft_Dotted # Lm MODIFIER LETTER SMALL J -03F3 ; Soft_Dotted # L& GREEK LETTER YOT -0456 ; Soft_Dotted # L& CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I -0458 ; Soft_Dotted # L& CYRILLIC SMALL LETTER JE -1D62 ; Soft_Dotted # Lm LATIN SUBSCRIPT SMALL LETTER I -1D96 ; Soft_Dotted # L& LATIN SMALL LETTER I WITH RETROFLEX HOOK -1DA4 ; Soft_Dotted # Lm MODIFIER LETTER SMALL I WITH STROKE -1DA8 ; Soft_Dotted # Lm MODIFIER LETTER SMALL J WITH CROSSED-TAIL -1E2D ; Soft_Dotted # L& LATIN SMALL LETTER I WITH TILDE BELOW -1ECB ; Soft_Dotted # L& LATIN SMALL LETTER I WITH DOT BELOW -2071 ; Soft_Dotted # Lm SUPERSCRIPT LATIN SMALL LETTER I -2148..2149 ; Soft_Dotted # L& [2] DOUBLE-STRUCK ITALIC SMALL I..DOUBLE-STRUCK ITALIC SMALL J -2C7C ; Soft_Dotted # Lm LATIN SUBSCRIPT SMALL LETTER J -1D422..1D423 ; Soft_Dotted # L& [2] MATHEMATICAL BOLD SMALL I..MATHEMATICAL BOLD SMALL J -1D456..1D457 ; Soft_Dotted # L& [2] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL ITALIC SMALL J -1D48A..1D48B ; Soft_Dotted # L& [2] MATHEMATICAL BOLD ITALIC SMALL I..MATHEMATICAL BOLD ITALIC SMALL J -1D4BE..1D4BF ; Soft_Dotted # L& [2] MATHEMATICAL SCRIPT SMALL I..MATHEMATICAL SCRIPT SMALL J -1D4F2..1D4F3 ; Soft_Dotted # L& [2] MATHEMATICAL BOLD SCRIPT SMALL I..MATHEMATICAL BOLD SCRIPT SMALL J -1D526..1D527 ; Soft_Dotted # L& [2] MATHEMATICAL FRAKTUR SMALL I..MATHEMATICAL FRAKTUR SMALL J -1D55A..1D55B ; Soft_Dotted # L& [2] MATHEMATICAL DOUBLE-STRUCK SMALL I..MATHEMATICAL DOUBLE-STRUCK SMALL J -1D58E..1D58F ; Soft_Dotted # L& [2] MATHEMATICAL BOLD FRAKTUR SMALL I..MATHEMATICAL BOLD FRAKTUR SMALL J -1D5C2..1D5C3 ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF SMALL I..MATHEMATICAL SANS-SERIF SMALL J -1D5F6..1D5F7 ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF BOLD SMALL I..MATHEMATICAL SANS-SERIF BOLD SMALL J -1D62A..1D62B ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF ITALIC SMALL I..MATHEMATICAL SANS-SERIF ITALIC SMALL J -1D65E..1D65F ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL I..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL J -1D692..1D693 ; Soft_Dotted # L& [2] MATHEMATICAL MONOSPACE SMALL I..MATHEMATICAL MONOSPACE SMALL J - -# Total code points: 46 - -# ================================================ - -0E40..0E44 ; Logical_Order_Exception # Lo [5] THAI CHARACTER SARA E..THAI CHARACTER SARA AI MAIMALAI -0EC0..0EC4 ; Logical_Order_Exception # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI -19B5..19B7 ; Logical_Order_Exception # Lo [3] NEW TAI LUE VOWEL SIGN E..NEW TAI LUE VOWEL SIGN O -19BA ; Logical_Order_Exception # Lo NEW TAI LUE VOWEL SIGN AY -AAB5..AAB6 ; Logical_Order_Exception # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O -AAB9 ; Logical_Order_Exception # Lo TAI VIET VOWEL UEA -AABB..AABC ; Logical_Order_Exception # Lo [2] TAI VIET VOWEL AUE..TAI VIET VOWEL AY - -# Total code points: 19 - -# ================================================ - -1885..1886 ; Other_ID_Start # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA -2118 ; Other_ID_Start # Sm SCRIPT CAPITAL P -212E ; Other_ID_Start # So ESTIMATED SYMBOL -309B..309C ; Other_ID_Start # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK - -# Total code points: 6 - -# ================================================ - -00B7 ; Other_ID_Continue # Po MIDDLE DOT -0387 ; Other_ID_Continue # Po GREEK ANO TELEIA -1369..1371 ; Other_ID_Continue # No [9] ETHIOPIC DIGIT ONE..ETHIOPIC DIGIT NINE -19DA ; Other_ID_Continue # No NEW TAI LUE THAM DIGIT ONE - -# Total code points: 12 - -# ================================================ - -0021 ; Sentence_Terminal # Po EXCLAMATION MARK -002E ; Sentence_Terminal # Po FULL STOP -003F ; Sentence_Terminal # Po QUESTION MARK -0589 ; Sentence_Terminal # Po ARMENIAN FULL STOP -061F ; Sentence_Terminal # Po ARABIC QUESTION MARK -06D4 ; Sentence_Terminal # Po ARABIC FULL STOP -0700..0702 ; Sentence_Terminal # Po [3] SYRIAC END OF PARAGRAPH..SYRIAC SUBLINEAR FULL STOP -07F9 ; Sentence_Terminal # Po NKO EXCLAMATION MARK -0964..0965 ; Sentence_Terminal # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA -104A..104B ; Sentence_Terminal # Po [2] MYANMAR SIGN LITTLE SECTION..MYANMAR SIGN SECTION -1362 ; Sentence_Terminal # Po ETHIOPIC FULL STOP -1367..1368 ; Sentence_Terminal # Po [2] ETHIOPIC QUESTION MARK..ETHIOPIC PARAGRAPH SEPARATOR -166E ; Sentence_Terminal # Po CANADIAN SYLLABICS FULL STOP -1735..1736 ; Sentence_Terminal # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION -1803 ; Sentence_Terminal # Po MONGOLIAN FULL STOP -1809 ; Sentence_Terminal # Po MONGOLIAN MANCHU FULL STOP -1944..1945 ; Sentence_Terminal # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK -1AA8..1AAB ; Sentence_Terminal # Po [4] TAI THAM SIGN KAAN..TAI THAM SIGN SATKAANKUU -1B5A..1B5B ; Sentence_Terminal # Po [2] BALINESE PANTI..BALINESE PAMADA -1B5E..1B5F ; Sentence_Terminal # Po [2] BALINESE CARIK SIKI..BALINESE CARIK PAREREN -1C3B..1C3C ; Sentence_Terminal # Po [2] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION NYET THYOOM TA-ROL -1C7E..1C7F ; Sentence_Terminal # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD -203C..203D ; Sentence_Terminal # Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG -2047..2049 ; Sentence_Terminal # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK -2E2E ; Sentence_Terminal # Po REVERSED QUESTION MARK -2E3C ; Sentence_Terminal # Po STENOGRAPHIC FULL STOP -3002 ; Sentence_Terminal # Po IDEOGRAPHIC FULL STOP -A4FF ; Sentence_Terminal # Po LISU PUNCTUATION FULL STOP -A60E..A60F ; Sentence_Terminal # Po [2] VAI FULL STOP..VAI QUESTION MARK -A6F3 ; Sentence_Terminal # Po BAMUM FULL STOP -A6F7 ; Sentence_Terminal # Po BAMUM QUESTION MARK -A876..A877 ; Sentence_Terminal # Po [2] PHAGS-PA MARK SHAD..PHAGS-PA MARK DOUBLE SHAD -A8CE..A8CF ; Sentence_Terminal # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA -A92F ; Sentence_Terminal # Po KAYAH LI SIGN SHYA -A9C8..A9C9 ; Sentence_Terminal # Po [2] JAVANESE PADA LINGSA..JAVANESE PADA LUNGSI -AA5D..AA5F ; Sentence_Terminal # Po [3] CHAM PUNCTUATION DANDA..CHAM PUNCTUATION TRIPLE DANDA -AAF0..AAF1 ; Sentence_Terminal # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM -ABEB ; Sentence_Terminal # Po MEETEI MAYEK CHEIKHEI -FE52 ; Sentence_Terminal # Po SMALL FULL STOP -FE56..FE57 ; Sentence_Terminal # Po [2] SMALL QUESTION MARK..SMALL EXCLAMATION MARK -FF01 ; Sentence_Terminal # Po FULLWIDTH EXCLAMATION MARK -FF0E ; Sentence_Terminal # Po FULLWIDTH FULL STOP -FF1F ; Sentence_Terminal # Po FULLWIDTH QUESTION MARK -FF61 ; Sentence_Terminal # Po HALFWIDTH IDEOGRAPHIC FULL STOP -10A56..10A57 ; Sentence_Terminal # Po [2] KHAROSHTHI PUNCTUATION DANDA..KHAROSHTHI PUNCTUATION DOUBLE DANDA -11047..11048 ; Sentence_Terminal # Po [2] BRAHMI DANDA..BRAHMI DOUBLE DANDA -110BE..110C1 ; Sentence_Terminal # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA -11141..11143 ; Sentence_Terminal # Po [3] CHAKMA DANDA..CHAKMA QUESTION MARK -111C5..111C6 ; Sentence_Terminal # Po [2] SHARADA DANDA..SHARADA DOUBLE DANDA -111CD ; Sentence_Terminal # Po SHARADA SUTRA MARK -111DE..111DF ; Sentence_Terminal # Po [2] SHARADA SECTION MARK-1..SHARADA SECTION MARK-2 -11238..11239 ; Sentence_Terminal # Po [2] KHOJKI DANDA..KHOJKI DOUBLE DANDA -1123B..1123C ; Sentence_Terminal # Po [2] KHOJKI SECTION MARK..KHOJKI DOUBLE SECTION MARK -112A9 ; Sentence_Terminal # Po MULTANI SECTION MARK -1144B..1144C ; Sentence_Terminal # Po [2] NEWA DANDA..NEWA DOUBLE DANDA -115C2..115C3 ; Sentence_Terminal # Po [2] SIDDHAM DANDA..SIDDHAM DOUBLE DANDA -115C9..115D7 ; Sentence_Terminal # Po [15] SIDDHAM END OF TEXT MARK..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES -11641..11642 ; Sentence_Terminal # Po [2] MODI DANDA..MODI DOUBLE DANDA -1173C..1173E ; Sentence_Terminal # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI -11A42..11A43 ; Sentence_Terminal # Po [2] ZANABAZAR SQUARE MARK SHAD..ZANABAZAR SQUARE MARK DOUBLE SHAD -11A9B..11A9C ; Sentence_Terminal # Po [2] SOYOMBO MARK SHAD..SOYOMBO MARK DOUBLE SHAD -11C41..11C42 ; Sentence_Terminal # Po [2] BHAIKSUKI DANDA..BHAIKSUKI DOUBLE DANDA -16A6E..16A6F ; Sentence_Terminal # Po [2] MRO DANDA..MRO DOUBLE DANDA -16AF5 ; Sentence_Terminal # Po BASSA VAH FULL STOP -16B37..16B38 ; Sentence_Terminal # Po [2] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN VOS TSHAB CEEB -16B44 ; Sentence_Terminal # Po PAHAWH HMONG SIGN XAUS -1BC9F ; Sentence_Terminal # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP -1DA88 ; Sentence_Terminal # Po SIGNWRITING FULL STOP - -# Total code points: 128 - -# ================================================ - -180B..180D ; Variation_Selector # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE -FE00..FE0F ; Variation_Selector # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 -E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 - -# Total code points: 259 - -# ================================================ - -0009..000D ; Pattern_White_Space # Cc [5] .. -0020 ; Pattern_White_Space # Zs SPACE -0085 ; Pattern_White_Space # Cc -200E..200F ; Pattern_White_Space # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK -2028 ; Pattern_White_Space # Zl LINE SEPARATOR -2029 ; Pattern_White_Space # Zp PARAGRAPH SEPARATOR - -# Total code points: 11 - -# ================================================ - -0021..0023 ; Pattern_Syntax # Po [3] EXCLAMATION MARK..NUMBER SIGN -0024 ; Pattern_Syntax # Sc DOLLAR SIGN -0025..0027 ; Pattern_Syntax # Po [3] PERCENT SIGN..APOSTROPHE -0028 ; Pattern_Syntax # Ps LEFT PARENTHESIS -0029 ; Pattern_Syntax # Pe RIGHT PARENTHESIS -002A ; Pattern_Syntax # Po ASTERISK -002B ; Pattern_Syntax # Sm PLUS SIGN -002C ; Pattern_Syntax # Po COMMA -002D ; Pattern_Syntax # Pd HYPHEN-MINUS -002E..002F ; Pattern_Syntax # Po [2] FULL STOP..SOLIDUS -003A..003B ; Pattern_Syntax # Po [2] COLON..SEMICOLON -003C..003E ; Pattern_Syntax # Sm [3] LESS-THAN SIGN..GREATER-THAN SIGN -003F..0040 ; Pattern_Syntax # Po [2] QUESTION MARK..COMMERCIAL AT -005B ; Pattern_Syntax # Ps LEFT SQUARE BRACKET -005C ; Pattern_Syntax # Po REVERSE SOLIDUS -005D ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET -005E ; Pattern_Syntax # Sk CIRCUMFLEX ACCENT -0060 ; Pattern_Syntax # Sk GRAVE ACCENT -007B ; Pattern_Syntax # Ps LEFT CURLY BRACKET -007C ; Pattern_Syntax # Sm VERTICAL LINE -007D ; Pattern_Syntax # Pe RIGHT CURLY BRACKET -007E ; Pattern_Syntax # Sm TILDE -00A1 ; Pattern_Syntax # Po INVERTED EXCLAMATION MARK -00A2..00A5 ; Pattern_Syntax # Sc [4] CENT SIGN..YEN SIGN -00A6 ; Pattern_Syntax # So BROKEN BAR -00A7 ; Pattern_Syntax # Po SECTION SIGN -00A9 ; Pattern_Syntax # So COPYRIGHT SIGN -00AB ; Pattern_Syntax # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK -00AC ; Pattern_Syntax # Sm NOT SIGN -00AE ; Pattern_Syntax # So REGISTERED SIGN -00B0 ; Pattern_Syntax # So DEGREE SIGN -00B1 ; Pattern_Syntax # Sm PLUS-MINUS SIGN -00B6 ; Pattern_Syntax # Po PILCROW SIGN -00BB ; Pattern_Syntax # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK -00BF ; Pattern_Syntax # Po INVERTED QUESTION MARK -00D7 ; Pattern_Syntax # Sm MULTIPLICATION SIGN -00F7 ; Pattern_Syntax # Sm DIVISION SIGN -2010..2015 ; Pattern_Syntax # Pd [6] HYPHEN..HORIZONTAL BAR -2016..2017 ; Pattern_Syntax # Po [2] DOUBLE VERTICAL LINE..DOUBLE LOW LINE -2018 ; Pattern_Syntax # Pi LEFT SINGLE QUOTATION MARK -2019 ; Pattern_Syntax # Pf RIGHT SINGLE QUOTATION MARK -201A ; Pattern_Syntax # Ps SINGLE LOW-9 QUOTATION MARK -201B..201C ; Pattern_Syntax # Pi [2] SINGLE HIGH-REVERSED-9 QUOTATION MARK..LEFT DOUBLE QUOTATION MARK -201D ; Pattern_Syntax # Pf RIGHT DOUBLE QUOTATION MARK -201E ; Pattern_Syntax # Ps DOUBLE LOW-9 QUOTATION MARK -201F ; Pattern_Syntax # Pi DOUBLE HIGH-REVERSED-9 QUOTATION MARK -2020..2027 ; Pattern_Syntax # Po [8] DAGGER..HYPHENATION POINT -2030..2038 ; Pattern_Syntax # Po [9] PER MILLE SIGN..CARET -2039 ; Pattern_Syntax # Pi SINGLE LEFT-POINTING ANGLE QUOTATION MARK -203A ; Pattern_Syntax # Pf SINGLE RIGHT-POINTING ANGLE QUOTATION MARK -203B..203E ; Pattern_Syntax # Po [4] REFERENCE MARK..OVERLINE -2041..2043 ; Pattern_Syntax # Po [3] CARET INSERTION POINT..HYPHEN BULLET -2044 ; Pattern_Syntax # Sm FRACTION SLASH -2045 ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH QUILL -2046 ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH QUILL -2047..2051 ; Pattern_Syntax # Po [11] DOUBLE QUESTION MARK..TWO ASTERISKS ALIGNED VERTICALLY -2052 ; Pattern_Syntax # Sm COMMERCIAL MINUS SIGN -2053 ; Pattern_Syntax # Po SWUNG DASH -2055..205E ; Pattern_Syntax # Po [10] FLOWER PUNCTUATION MARK..VERTICAL FOUR DOTS -2190..2194 ; Pattern_Syntax # Sm [5] LEFTWARDS ARROW..LEFT RIGHT ARROW -2195..2199 ; Pattern_Syntax # So [5] UP DOWN ARROW..SOUTH WEST ARROW -219A..219B ; Pattern_Syntax # Sm [2] LEFTWARDS ARROW WITH STROKE..RIGHTWARDS ARROW WITH STROKE -219C..219F ; Pattern_Syntax # So [4] LEFTWARDS WAVE ARROW..UPWARDS TWO HEADED ARROW -21A0 ; Pattern_Syntax # Sm RIGHTWARDS TWO HEADED ARROW -21A1..21A2 ; Pattern_Syntax # So [2] DOWNWARDS TWO HEADED ARROW..LEFTWARDS ARROW WITH TAIL -21A3 ; Pattern_Syntax # Sm RIGHTWARDS ARROW WITH TAIL -21A4..21A5 ; Pattern_Syntax # So [2] LEFTWARDS ARROW FROM BAR..UPWARDS ARROW FROM BAR -21A6 ; Pattern_Syntax # Sm RIGHTWARDS ARROW FROM BAR -21A7..21AD ; Pattern_Syntax # So [7] DOWNWARDS ARROW FROM BAR..LEFT RIGHT WAVE ARROW -21AE ; Pattern_Syntax # Sm LEFT RIGHT ARROW WITH STROKE -21AF..21CD ; Pattern_Syntax # So [31] DOWNWARDS ZIGZAG ARROW..LEFTWARDS DOUBLE ARROW WITH STROKE -21CE..21CF ; Pattern_Syntax # Sm [2] LEFT RIGHT DOUBLE ARROW WITH STROKE..RIGHTWARDS DOUBLE ARROW WITH STROKE -21D0..21D1 ; Pattern_Syntax # So [2] LEFTWARDS DOUBLE ARROW..UPWARDS DOUBLE ARROW -21D2 ; Pattern_Syntax # Sm RIGHTWARDS DOUBLE ARROW -21D3 ; Pattern_Syntax # So DOWNWARDS DOUBLE ARROW -21D4 ; Pattern_Syntax # Sm LEFT RIGHT DOUBLE ARROW -21D5..21F3 ; Pattern_Syntax # So [31] UP DOWN DOUBLE ARROW..UP DOWN WHITE ARROW -21F4..22FF ; Pattern_Syntax # Sm [268] RIGHT ARROW WITH SMALL CIRCLE..Z NOTATION BAG MEMBERSHIP -2300..2307 ; Pattern_Syntax # So [8] DIAMETER SIGN..WAVY LINE -2308 ; Pattern_Syntax # Ps LEFT CEILING -2309 ; Pattern_Syntax # Pe RIGHT CEILING -230A ; Pattern_Syntax # Ps LEFT FLOOR -230B ; Pattern_Syntax # Pe RIGHT FLOOR -230C..231F ; Pattern_Syntax # So [20] BOTTOM RIGHT CROP..BOTTOM RIGHT CORNER -2320..2321 ; Pattern_Syntax # Sm [2] TOP HALF INTEGRAL..BOTTOM HALF INTEGRAL -2322..2328 ; Pattern_Syntax # So [7] FROWN..KEYBOARD -2329 ; Pattern_Syntax # Ps LEFT-POINTING ANGLE BRACKET -232A ; Pattern_Syntax # Pe RIGHT-POINTING ANGLE BRACKET -232B..237B ; Pattern_Syntax # So [81] ERASE TO THE LEFT..NOT CHECK MARK -237C ; Pattern_Syntax # Sm RIGHT ANGLE WITH DOWNWARDS ZIGZAG ARROW -237D..239A ; Pattern_Syntax # So [30] SHOULDERED OPEN BOX..CLEAR SCREEN SYMBOL -239B..23B3 ; Pattern_Syntax # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM -23B4..23DB ; Pattern_Syntax # So [40] TOP SQUARE BRACKET..FUSE -23DC..23E1 ; Pattern_Syntax # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET -23E2..2426 ; Pattern_Syntax # So [69] WHITE TRAPEZIUM..SYMBOL FOR SUBSTITUTE FORM TWO -2427..243F ; Pattern_Syntax # Cn [25] .. -2440..244A ; Pattern_Syntax # So [11] OCR HOOK..OCR DOUBLE BACKSLASH -244B..245F ; Pattern_Syntax # Cn [21] .. -2500..25B6 ; Pattern_Syntax # So [183] BOX DRAWINGS LIGHT HORIZONTAL..BLACK RIGHT-POINTING TRIANGLE -25B7 ; Pattern_Syntax # Sm WHITE RIGHT-POINTING TRIANGLE -25B8..25C0 ; Pattern_Syntax # So [9] BLACK RIGHT-POINTING SMALL TRIANGLE..BLACK LEFT-POINTING TRIANGLE -25C1 ; Pattern_Syntax # Sm WHITE LEFT-POINTING TRIANGLE -25C2..25F7 ; Pattern_Syntax # So [54] BLACK LEFT-POINTING SMALL TRIANGLE..WHITE CIRCLE WITH UPPER RIGHT QUADRANT -25F8..25FF ; Pattern_Syntax # Sm [8] UPPER LEFT TRIANGLE..LOWER RIGHT TRIANGLE -2600..266E ; Pattern_Syntax # So [111] BLACK SUN WITH RAYS..MUSIC NATURAL SIGN -266F ; Pattern_Syntax # Sm MUSIC SHARP SIGN -2670..2767 ; Pattern_Syntax # So [248] WEST SYRIAC CROSS..ROTATED FLORAL HEART BULLET -2768 ; Pattern_Syntax # Ps MEDIUM LEFT PARENTHESIS ORNAMENT -2769 ; Pattern_Syntax # Pe MEDIUM RIGHT PARENTHESIS ORNAMENT -276A ; Pattern_Syntax # Ps MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT -276B ; Pattern_Syntax # Pe MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT -276C ; Pattern_Syntax # Ps MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT -276D ; Pattern_Syntax # Pe MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT -276E ; Pattern_Syntax # Ps HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT -276F ; Pattern_Syntax # Pe HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT -2770 ; Pattern_Syntax # Ps HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT -2771 ; Pattern_Syntax # Pe HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT -2772 ; Pattern_Syntax # Ps LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT -2773 ; Pattern_Syntax # Pe LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT -2774 ; Pattern_Syntax # Ps MEDIUM LEFT CURLY BRACKET ORNAMENT -2775 ; Pattern_Syntax # Pe MEDIUM RIGHT CURLY BRACKET ORNAMENT -2794..27BF ; Pattern_Syntax # So [44] HEAVY WIDE-HEADED RIGHTWARDS ARROW..DOUBLE CURLY LOOP -27C0..27C4 ; Pattern_Syntax # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET -27C5 ; Pattern_Syntax # Ps LEFT S-SHAPED BAG DELIMITER -27C6 ; Pattern_Syntax # Pe RIGHT S-SHAPED BAG DELIMITER -27C7..27E5 ; Pattern_Syntax # Sm [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK -27E6 ; Pattern_Syntax # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET -27E7 ; Pattern_Syntax # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET -27E8 ; Pattern_Syntax # Ps MATHEMATICAL LEFT ANGLE BRACKET -27E9 ; Pattern_Syntax # Pe MATHEMATICAL RIGHT ANGLE BRACKET -27EA ; Pattern_Syntax # Ps MATHEMATICAL LEFT DOUBLE ANGLE BRACKET -27EB ; Pattern_Syntax # Pe MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET -27EC ; Pattern_Syntax # Ps MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET -27ED ; Pattern_Syntax # Pe MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET -27EE ; Pattern_Syntax # Ps MATHEMATICAL LEFT FLATTENED PARENTHESIS -27EF ; Pattern_Syntax # Pe MATHEMATICAL RIGHT FLATTENED PARENTHESIS -27F0..27FF ; Pattern_Syntax # Sm [16] UPWARDS QUADRUPLE ARROW..LONG RIGHTWARDS SQUIGGLE ARROW -2800..28FF ; Pattern_Syntax # So [256] BRAILLE PATTERN BLANK..BRAILLE PATTERN DOTS-12345678 -2900..2982 ; Pattern_Syntax # Sm [131] RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE..Z NOTATION TYPE COLON -2983 ; Pattern_Syntax # Ps LEFT WHITE CURLY BRACKET -2984 ; Pattern_Syntax # Pe RIGHT WHITE CURLY BRACKET -2985 ; Pattern_Syntax # Ps LEFT WHITE PARENTHESIS -2986 ; Pattern_Syntax # Pe RIGHT WHITE PARENTHESIS -2987 ; Pattern_Syntax # Ps Z NOTATION LEFT IMAGE BRACKET -2988 ; Pattern_Syntax # Pe Z NOTATION RIGHT IMAGE BRACKET -2989 ; Pattern_Syntax # Ps Z NOTATION LEFT BINDING BRACKET -298A ; Pattern_Syntax # Pe Z NOTATION RIGHT BINDING BRACKET -298B ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH UNDERBAR -298C ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH UNDERBAR -298D ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH TICK IN TOP CORNER -298E ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER -298F ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER -2990 ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER -2991 ; Pattern_Syntax # Ps LEFT ANGLE BRACKET WITH DOT -2992 ; Pattern_Syntax # Pe RIGHT ANGLE BRACKET WITH DOT -2993 ; Pattern_Syntax # Ps LEFT ARC LESS-THAN BRACKET -2994 ; Pattern_Syntax # Pe RIGHT ARC GREATER-THAN BRACKET -2995 ; Pattern_Syntax # Ps DOUBLE LEFT ARC GREATER-THAN BRACKET -2996 ; Pattern_Syntax # Pe DOUBLE RIGHT ARC LESS-THAN BRACKET -2997 ; Pattern_Syntax # Ps LEFT BLACK TORTOISE SHELL BRACKET -2998 ; Pattern_Syntax # Pe RIGHT BLACK TORTOISE SHELL BRACKET -2999..29D7 ; Pattern_Syntax # Sm [63] DOTTED FENCE..BLACK HOURGLASS -29D8 ; Pattern_Syntax # Ps LEFT WIGGLY FENCE -29D9 ; Pattern_Syntax # Pe RIGHT WIGGLY FENCE -29DA ; Pattern_Syntax # Ps LEFT DOUBLE WIGGLY FENCE -29DB ; Pattern_Syntax # Pe RIGHT DOUBLE WIGGLY FENCE -29DC..29FB ; Pattern_Syntax # Sm [32] INCOMPLETE INFINITY..TRIPLE PLUS -29FC ; Pattern_Syntax # Ps LEFT-POINTING CURVED ANGLE BRACKET -29FD ; Pattern_Syntax # Pe RIGHT-POINTING CURVED ANGLE BRACKET -29FE..2AFF ; Pattern_Syntax # Sm [258] TINY..N-ARY WHITE VERTICAL BAR -2B00..2B2F ; Pattern_Syntax # So [48] NORTH EAST WHITE ARROW..WHITE VERTICAL ELLIPSE -2B30..2B44 ; Pattern_Syntax # Sm [21] LEFT ARROW WITH SMALL CIRCLE..RIGHTWARDS ARROW THROUGH SUPERSET -2B45..2B46 ; Pattern_Syntax # So [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW -2B47..2B4C ; Pattern_Syntax # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR -2B4D..2B73 ; Pattern_Syntax # So [39] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR -2B74..2B75 ; Pattern_Syntax # Cn [2] .. -2B76..2B95 ; Pattern_Syntax # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW -2B96..2B97 ; Pattern_Syntax # Cn [2] .. -2B98..2BB9 ; Pattern_Syntax # So [34] THREE-D TOP-LIGHTED LEFTWARDS EQUILATERAL ARROWHEAD..UP ARROWHEAD IN A RECTANGLE BOX -2BBA..2BBC ; Pattern_Syntax # Cn [3] .. -2BBD..2BC8 ; Pattern_Syntax # So [12] BALLOT BOX WITH LIGHT X..BLACK MEDIUM RIGHT-POINTING TRIANGLE CENTRED -2BC9 ; Pattern_Syntax # Cn -2BCA..2BD2 ; Pattern_Syntax # So [9] TOP HALF BLACK CIRCLE..GROUP MARK -2BD3..2BEB ; Pattern_Syntax # Cn [25] .. -2BEC..2BEF ; Pattern_Syntax # So [4] LEFTWARDS TWO-HEADED ARROW WITH TRIANGLE ARROWHEADS..DOWNWARDS TWO-HEADED ARROW WITH TRIANGLE ARROWHEADS -2BF0..2BFF ; Pattern_Syntax # Cn [16] .. -2E00..2E01 ; Pattern_Syntax # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER -2E02 ; Pattern_Syntax # Pi LEFT SUBSTITUTION BRACKET -2E03 ; Pattern_Syntax # Pf RIGHT SUBSTITUTION BRACKET -2E04 ; Pattern_Syntax # Pi LEFT DOTTED SUBSTITUTION BRACKET -2E05 ; Pattern_Syntax # Pf RIGHT DOTTED SUBSTITUTION BRACKET -2E06..2E08 ; Pattern_Syntax # Po [3] RAISED INTERPOLATION MARKER..DOTTED TRANSPOSITION MARKER -2E09 ; Pattern_Syntax # Pi LEFT TRANSPOSITION BRACKET -2E0A ; Pattern_Syntax # Pf RIGHT TRANSPOSITION BRACKET -2E0B ; Pattern_Syntax # Po RAISED SQUARE -2E0C ; Pattern_Syntax # Pi LEFT RAISED OMISSION BRACKET -2E0D ; Pattern_Syntax # Pf RIGHT RAISED OMISSION BRACKET -2E0E..2E16 ; Pattern_Syntax # Po [9] EDITORIAL CORONIS..DOTTED RIGHT-POINTING ANGLE -2E17 ; Pattern_Syntax # Pd DOUBLE OBLIQUE HYPHEN -2E18..2E19 ; Pattern_Syntax # Po [2] INVERTED INTERROBANG..PALM BRANCH -2E1A ; Pattern_Syntax # Pd HYPHEN WITH DIAERESIS -2E1B ; Pattern_Syntax # Po TILDE WITH RING ABOVE -2E1C ; Pattern_Syntax # Pi LEFT LOW PARAPHRASE BRACKET -2E1D ; Pattern_Syntax # Pf RIGHT LOW PARAPHRASE BRACKET -2E1E..2E1F ; Pattern_Syntax # Po [2] TILDE WITH DOT ABOVE..TILDE WITH DOT BELOW -2E20 ; Pattern_Syntax # Pi LEFT VERTICAL BAR WITH QUILL -2E21 ; Pattern_Syntax # Pf RIGHT VERTICAL BAR WITH QUILL -2E22 ; Pattern_Syntax # Ps TOP LEFT HALF BRACKET -2E23 ; Pattern_Syntax # Pe TOP RIGHT HALF BRACKET -2E24 ; Pattern_Syntax # Ps BOTTOM LEFT HALF BRACKET -2E25 ; Pattern_Syntax # Pe BOTTOM RIGHT HALF BRACKET -2E26 ; Pattern_Syntax # Ps LEFT SIDEWAYS U BRACKET -2E27 ; Pattern_Syntax # Pe RIGHT SIDEWAYS U BRACKET -2E28 ; Pattern_Syntax # Ps LEFT DOUBLE PARENTHESIS -2E29 ; Pattern_Syntax # Pe RIGHT DOUBLE PARENTHESIS -2E2A..2E2E ; Pattern_Syntax # Po [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK -2E2F ; Pattern_Syntax # Lm VERTICAL TILDE -2E30..2E39 ; Pattern_Syntax # Po [10] RING POINT..TOP HALF SECTION SIGN -2E3A..2E3B ; Pattern_Syntax # Pd [2] TWO-EM DASH..THREE-EM DASH -2E3C..2E3F ; Pattern_Syntax # Po [4] STENOGRAPHIC FULL STOP..CAPITULUM -2E40 ; Pattern_Syntax # Pd DOUBLE HYPHEN -2E41 ; Pattern_Syntax # Po REVERSED COMMA -2E42 ; Pattern_Syntax # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK -2E43..2E49 ; Pattern_Syntax # Po [7] DASH WITH LEFT UPTURN..DOUBLE STACKED COMMA -2E4A..2E7F ; Pattern_Syntax # Cn [54] .. -3001..3003 ; Pattern_Syntax # Po [3] IDEOGRAPHIC COMMA..DITTO MARK -3008 ; Pattern_Syntax # Ps LEFT ANGLE BRACKET -3009 ; Pattern_Syntax # Pe RIGHT ANGLE BRACKET -300A ; Pattern_Syntax # Ps LEFT DOUBLE ANGLE BRACKET -300B ; Pattern_Syntax # Pe RIGHT DOUBLE ANGLE BRACKET -300C ; Pattern_Syntax # Ps LEFT CORNER BRACKET -300D ; Pattern_Syntax # Pe RIGHT CORNER BRACKET -300E ; Pattern_Syntax # Ps LEFT WHITE CORNER BRACKET -300F ; Pattern_Syntax # Pe RIGHT WHITE CORNER BRACKET -3010 ; Pattern_Syntax # Ps LEFT BLACK LENTICULAR BRACKET -3011 ; Pattern_Syntax # Pe RIGHT BLACK LENTICULAR BRACKET -3012..3013 ; Pattern_Syntax # So [2] POSTAL MARK..GETA MARK -3014 ; Pattern_Syntax # Ps LEFT TORTOISE SHELL BRACKET -3015 ; Pattern_Syntax # Pe RIGHT TORTOISE SHELL BRACKET -3016 ; Pattern_Syntax # Ps LEFT WHITE LENTICULAR BRACKET -3017 ; Pattern_Syntax # Pe RIGHT WHITE LENTICULAR BRACKET -3018 ; Pattern_Syntax # Ps LEFT WHITE TORTOISE SHELL BRACKET -3019 ; Pattern_Syntax # Pe RIGHT WHITE TORTOISE SHELL BRACKET -301A ; Pattern_Syntax # Ps LEFT WHITE SQUARE BRACKET -301B ; Pattern_Syntax # Pe RIGHT WHITE SQUARE BRACKET -301C ; Pattern_Syntax # Pd WAVE DASH -301D ; Pattern_Syntax # Ps REVERSED DOUBLE PRIME QUOTATION MARK -301E..301F ; Pattern_Syntax # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK -3020 ; Pattern_Syntax # So POSTAL MARK FACE -3030 ; Pattern_Syntax # Pd WAVY DASH -FD3E ; Pattern_Syntax # Pe ORNATE LEFT PARENTHESIS -FD3F ; Pattern_Syntax # Ps ORNATE RIGHT PARENTHESIS -FE45..FE46 ; Pattern_Syntax # Po [2] SESAME DOT..WHITE SESAME DOT - -# Total code points: 2760 - -# ================================================ - -0600..0605 ; Prepended_Concatenation_Mark # Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE -06DD ; Prepended_Concatenation_Mark # Cf ARABIC END OF AYAH -070F ; Prepended_Concatenation_Mark # Cf SYRIAC ABBREVIATION MARK -08E2 ; Prepended_Concatenation_Mark # Cf ARABIC DISPUTED END OF AYAH -110BD ; Prepended_Concatenation_Mark # Cf KAITHI NUMBER SIGN - -# Total code points: 10 - -# ================================================ - -1F1E6..1F1FF ; Regional_Indicator # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z - -# Total code points: 26 - -# EOF diff --git a/lib-satysfi/unidata/PropertyAliases.txt b/lib-satysfi/unidata/PropertyAliases.txt deleted file mode 100644 index f2e26ba05..000000000 --- a/lib-satysfi/unidata/PropertyAliases.txt +++ /dev/null @@ -1,197 +0,0 @@ -# PropertyAliases-10.0.0.txt -# Date: 2017-02-14, 04:26:16 GMT -# © 2017 Unicode®, Inc. -# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. -# For terms of use, see http://www.unicode.org/terms_of_use.html -# -# Unicode Character Database -# For documentation, see http://www.unicode.org/reports/tr44/ -# -# This file contains aliases for properties used in the UCD. -# These names can be used for XML formats of UCD data, for regular-expression -# property tests, and other programmatic textual descriptions of Unicode data. -# -# The names may be translated in appropriate environments, and additional -# aliases may be useful. -# -# FORMAT -# -# Each line has two or more fields, separated by semicolons. -# -# First Field: The first field is an abbreviated name for the property. -# -# Second Field: The second field is a long name -# -# The above are the preferred aliases. Other aliases may be listed in additional fields. -# -# Loose matching should be applied to all property names and property values, with -# the exception of String Property values. With loose matching of property names and -# values, the case distinctions, whitespace, and '_' are ignored. For Numeric Property -# values, numeric equivalencies are applied: thus "01.00" is equivalent to "1". -# -# NOTE: Property value names are NOT unique across properties. For example: -# -# AL means Arabic Letter for the Bidi_Class property, and -# AL means Above_Left for the Combining_Class property, and -# AL means Alphabetic for the Line_Break property. -# -# In addition, some property names may be the same as some property value names. -# For example: -# -# sc means the Script property, and -# Sc means the General_Category property value Currency_Symbol (Sc) -# -# The combination of property value and property name is, however, unique. -# -# For more information, see UTS #18: Unicode Regular Expressions -# ================================================ - - -# ================================================ -# Numeric Properties -# ================================================ -cjkAccountingNumeric ; kAccountingNumeric -cjkOtherNumeric ; kOtherNumeric -cjkPrimaryNumeric ; kPrimaryNumeric -nv ; Numeric_Value - -# ================================================ -# String Properties -# ================================================ -cf ; Case_Folding -cjkCompatibilityVariant ; kCompatibilityVariant -dm ; Decomposition_Mapping -FC_NFKC ; FC_NFKC_Closure -lc ; Lowercase_Mapping -NFKC_CF ; NFKC_Casefold -scf ; Simple_Case_Folding ; sfc -slc ; Simple_Lowercase_Mapping -stc ; Simple_Titlecase_Mapping -suc ; Simple_Uppercase_Mapping -tc ; Titlecase_Mapping -uc ; Uppercase_Mapping - -# ================================================ -# Miscellaneous Properties -# ================================================ -bmg ; Bidi_Mirroring_Glyph -bpb ; Bidi_Paired_Bracket -cjkIICore ; kIICore -cjkIRG_GSource ; kIRG_GSource -cjkIRG_HSource ; kIRG_HSource -cjkIRG_JSource ; kIRG_JSource -cjkIRG_KPSource ; kIRG_KPSource -cjkIRG_KSource ; kIRG_KSource -cjkIRG_MSource ; kIRG_MSource -cjkIRG_TSource ; kIRG_TSource -cjkIRG_USource ; kIRG_USource -cjkIRG_VSource ; kIRG_VSource -cjkRSUnicode ; kRSUnicode ; Unicode_Radical_Stroke; URS -isc ; ISO_Comment -JSN ; Jamo_Short_Name -na ; Name -na1 ; Unicode_1_Name -Name_Alias ; Name_Alias -scx ; Script_Extensions - -# ================================================ -# Catalog Properties -# ================================================ -age ; Age -blk ; Block -sc ; Script - -# ================================================ -# Enumerated Properties -# ================================================ -bc ; Bidi_Class -bpt ; Bidi_Paired_Bracket_Type -ccc ; Canonical_Combining_Class -dt ; Decomposition_Type -ea ; East_Asian_Width -gc ; General_Category -GCB ; Grapheme_Cluster_Break -hst ; Hangul_Syllable_Type -InPC ; Indic_Positional_Category -InSC ; Indic_Syllabic_Category -jg ; Joining_Group -jt ; Joining_Type -lb ; Line_Break -NFC_QC ; NFC_Quick_Check -NFD_QC ; NFD_Quick_Check -NFKC_QC ; NFKC_Quick_Check -NFKD_QC ; NFKD_Quick_Check -nt ; Numeric_Type -SB ; Sentence_Break -vo ; Vertical_Orientation -WB ; Word_Break - -# ================================================ -# Binary Properties -# ================================================ -AHex ; ASCII_Hex_Digit -Alpha ; Alphabetic -Bidi_C ; Bidi_Control -Bidi_M ; Bidi_Mirrored -Cased ; Cased -CE ; Composition_Exclusion -CI ; Case_Ignorable -Comp_Ex ; Full_Composition_Exclusion -CWCF ; Changes_When_Casefolded -CWCM ; Changes_When_Casemapped -CWKCF ; Changes_When_NFKC_Casefolded -CWL ; Changes_When_Lowercased -CWT ; Changes_When_Titlecased -CWU ; Changes_When_Uppercased -Dash ; Dash -Dep ; Deprecated -DI ; Default_Ignorable_Code_Point -Dia ; Diacritic -Ext ; Extender -Gr_Base ; Grapheme_Base -Gr_Ext ; Grapheme_Extend -Gr_Link ; Grapheme_Link -Hex ; Hex_Digit -Hyphen ; Hyphen -IDC ; ID_Continue -Ideo ; Ideographic -IDS ; ID_Start -IDSB ; IDS_Binary_Operator -IDST ; IDS_Trinary_Operator -Join_C ; Join_Control -LOE ; Logical_Order_Exception -Lower ; Lowercase -Math ; Math -NChar ; Noncharacter_Code_Point -OAlpha ; Other_Alphabetic -ODI ; Other_Default_Ignorable_Code_Point -OGr_Ext ; Other_Grapheme_Extend -OIDC ; Other_ID_Continue -OIDS ; Other_ID_Start -OLower ; Other_Lowercase -OMath ; Other_Math -OUpper ; Other_Uppercase -Pat_Syn ; Pattern_Syntax -Pat_WS ; Pattern_White_Space -PCM ; Prepended_Concatenation_Mark -QMark ; Quotation_Mark -Radical ; Radical -RI ; Regional_Indicator -SD ; Soft_Dotted -STerm ; Sentence_Terminal -Term ; Terminal_Punctuation -UIdeo ; Unified_Ideograph -Upper ; Uppercase -VS ; Variation_Selector -WSpace ; White_Space ; space -XIDC ; XID_Continue -XIDS ; XID_Start -XO_NFC ; Expands_On_NFC -XO_NFD ; Expands_On_NFD -XO_NFKC ; Expands_On_NFKC -XO_NFKD ; Expands_On_NFKD - -# ================================================ -# Total: 120 - -# EOF diff --git a/lib-satysfi/unidata/PropertyValueAliases.txt b/lib-satysfi/unidata/PropertyValueAliases.txt deleted file mode 100644 index e9b3548ca..000000000 --- a/lib-satysfi/unidata/PropertyValueAliases.txt +++ /dev/null @@ -1,1496 +0,0 @@ -# PropertyValueAliases-10.0.0.txt -# Date: 2017-05-17, 08:45:34 GMT -# © 2017 Unicode®, Inc. -# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. -# For terms of use, see http://www.unicode.org/terms_of_use.html -# -# Unicode Character Database -# For documentation, see http://www.unicode.org/reports/tr44/ -# -# This file contains aliases for property values used in the UCD. -# These names can be used for XML formats of UCD data, for regular-expression -# property tests, and other programmatic textual descriptions of Unicode data. -# -# The names may be translated in appropriate environments, and additional -# aliases may be useful. -# -# FORMAT -# -# Each line describes a property value name. -# This consists of three or more fields, separated by semicolons. -# -# First Field: The first field describes the property for which that -# property value name is used. -# -# Second Field: The second field is an abbreviated name. -# -# Third Field: The third field is a long name. -# -# In the case of ccc, there are 4 fields. The second field is numeric, third -# is abbreviated, and fourth is long. -# -# The above are the preferred aliases. Other aliases may be listed in additional fields. -# -# Loose matching should be applied to all property names and property values, with -# the exception of String Property values. With loose matching of property names and -# values, the case distinctions, whitespace, hyphens, and '_' are ignored. -# For Numeric Property values, numeric equivalence is applied: thus "01.00" -# is equivalent to "1". -# -# NOTE: Property value names are NOT unique across properties. For example: -# -# AL means Arabic Letter for the Bidi_Class property, and -# AL means Above_Left for the Canonical_Combining_Class property, and -# AL means Alphabetic for the Line_Break property. -# -# In addition, some property names may be the same as some property value names. -# For example: -# -# sc means the Script property, and -# Sc means the General_Category property value Currency_Symbol (Sc) -# -# The combination of property value and property name is, however, unique. -# -# For more information, see UTS #18: Unicode Regular Expressions -# ================================================ - - -# ASCII_Hex_Digit (AHex) - -AHex; N ; No ; F ; False -AHex; Y ; Yes ; T ; True - -# Age (age) - -age; 1.1 ; V1_1 -age; 2.0 ; V2_0 -age; 2.1 ; V2_1 -age; 3.0 ; V3_0 -age; 3.1 ; V3_1 -age; 3.2 ; V3_2 -age; 4.0 ; V4_0 -age; 4.1 ; V4_1 -age; 5.0 ; V5_0 -age; 5.1 ; V5_1 -age; 5.2 ; V5_2 -age; 6.0 ; V6_0 -age; 6.1 ; V6_1 -age; 6.2 ; V6_2 -age; 6.3 ; V6_3 -age; 7.0 ; V7_0 -age; 8.0 ; V8_0 -age; 9.0 ; V9_0 -age; 10.0 ; V10_0 -age; NA ; Unassigned - -# Alphabetic (Alpha) - -Alpha; N ; No ; F ; False -Alpha; Y ; Yes ; T ; True - -# Bidi_Class (bc) - -bc ; AL ; Arabic_Letter -bc ; AN ; Arabic_Number -bc ; B ; Paragraph_Separator -bc ; BN ; Boundary_Neutral -bc ; CS ; Common_Separator -bc ; EN ; European_Number -bc ; ES ; European_Separator -bc ; ET ; European_Terminator -bc ; FSI ; First_Strong_Isolate -bc ; L ; Left_To_Right -bc ; LRE ; Left_To_Right_Embedding -bc ; LRI ; Left_To_Right_Isolate -bc ; LRO ; Left_To_Right_Override -bc ; NSM ; Nonspacing_Mark -bc ; ON ; Other_Neutral -bc ; PDF ; Pop_Directional_Format -bc ; PDI ; Pop_Directional_Isolate -bc ; R ; Right_To_Left -bc ; RLE ; Right_To_Left_Embedding -bc ; RLI ; Right_To_Left_Isolate -bc ; RLO ; Right_To_Left_Override -bc ; S ; Segment_Separator -bc ; WS ; White_Space - -# Bidi_Control (Bidi_C) - -Bidi_C; N ; No ; F ; False -Bidi_C; Y ; Yes ; T ; True - -# Bidi_Mirrored (Bidi_M) - -Bidi_M; N ; No ; F ; False -Bidi_M; Y ; Yes ; T ; True - -# Bidi_Mirroring_Glyph (bmg) - -# @missing: 0000..10FFFF; Bidi_Mirroring_Glyph; - -# Bidi_Paired_Bracket (bpb) - -# @missing: 0000..10FFFF; Bidi_Paired_Bracket; - -# Bidi_Paired_Bracket_Type (bpt) - -bpt; c ; Close -bpt; n ; None -bpt; o ; Open -# @missing: 0000..10FFFF; Bidi_Paired_Bracket_Type; n - -# Block (blk) - -blk; Adlam ; Adlam -blk; Aegean_Numbers ; Aegean_Numbers -blk; Ahom ; Ahom -blk; Alchemical ; Alchemical_Symbols -blk; Alphabetic_PF ; Alphabetic_Presentation_Forms -blk; Anatolian_Hieroglyphs ; Anatolian_Hieroglyphs -blk; Ancient_Greek_Music ; Ancient_Greek_Musical_Notation -blk; Ancient_Greek_Numbers ; Ancient_Greek_Numbers -blk; Ancient_Symbols ; Ancient_Symbols -blk; Arabic ; Arabic -blk; Arabic_Ext_A ; Arabic_Extended_A -blk; Arabic_Math ; Arabic_Mathematical_Alphabetic_Symbols -blk; Arabic_PF_A ; Arabic_Presentation_Forms_A ; Arabic_Presentation_Forms-A -blk; Arabic_PF_B ; Arabic_Presentation_Forms_B -blk; Arabic_Sup ; Arabic_Supplement -blk; Armenian ; Armenian -blk; Arrows ; Arrows -blk; ASCII ; Basic_Latin -blk; Avestan ; Avestan -blk; Balinese ; Balinese -blk; Bamum ; Bamum -blk; Bamum_Sup ; Bamum_Supplement -blk; Bassa_Vah ; Bassa_Vah -blk; Batak ; Batak -blk; Bengali ; Bengali -blk; Bhaiksuki ; Bhaiksuki -blk; Block_Elements ; Block_Elements -blk; Bopomofo ; Bopomofo -blk; Bopomofo_Ext ; Bopomofo_Extended -blk; Box_Drawing ; Box_Drawing -blk; Brahmi ; Brahmi -blk; Braille ; Braille_Patterns -blk; Buginese ; Buginese -blk; Buhid ; Buhid -blk; Byzantine_Music ; Byzantine_Musical_Symbols -blk; Carian ; Carian -blk; Caucasian_Albanian ; Caucasian_Albanian -blk; Chakma ; Chakma -blk; Cham ; Cham -blk; Cherokee ; Cherokee -blk; Cherokee_Sup ; Cherokee_Supplement -blk; CJK ; CJK_Unified_Ideographs -blk; CJK_Compat ; CJK_Compatibility -blk; CJK_Compat_Forms ; CJK_Compatibility_Forms -blk; CJK_Compat_Ideographs ; CJK_Compatibility_Ideographs -blk; CJK_Compat_Ideographs_Sup ; CJK_Compatibility_Ideographs_Supplement -blk; CJK_Ext_A ; CJK_Unified_Ideographs_Extension_A -blk; CJK_Ext_B ; CJK_Unified_Ideographs_Extension_B -blk; CJK_Ext_C ; CJK_Unified_Ideographs_Extension_C -blk; CJK_Ext_D ; CJK_Unified_Ideographs_Extension_D -blk; CJK_Ext_E ; CJK_Unified_Ideographs_Extension_E -blk; CJK_Ext_F ; CJK_Unified_Ideographs_Extension_F -blk; CJK_Radicals_Sup ; CJK_Radicals_Supplement -blk; CJK_Strokes ; CJK_Strokes -blk; CJK_Symbols ; CJK_Symbols_And_Punctuation -blk; Compat_Jamo ; Hangul_Compatibility_Jamo -blk; Control_Pictures ; Control_Pictures -blk; Coptic ; Coptic -blk; Coptic_Epact_Numbers ; Coptic_Epact_Numbers -blk; Counting_Rod ; Counting_Rod_Numerals -blk; Cuneiform ; Cuneiform -blk; Cuneiform_Numbers ; Cuneiform_Numbers_And_Punctuation -blk; Currency_Symbols ; Currency_Symbols -blk; Cypriot_Syllabary ; Cypriot_Syllabary -blk; Cyrillic ; Cyrillic -blk; Cyrillic_Ext_A ; Cyrillic_Extended_A -blk; Cyrillic_Ext_B ; Cyrillic_Extended_B -blk; Cyrillic_Ext_C ; Cyrillic_Extended_C -blk; Cyrillic_Sup ; Cyrillic_Supplement ; Cyrillic_Supplementary -blk; Deseret ; Deseret -blk; Devanagari ; Devanagari -blk; Devanagari_Ext ; Devanagari_Extended -blk; Diacriticals ; Combining_Diacritical_Marks -blk; Diacriticals_Ext ; Combining_Diacritical_Marks_Extended -blk; Diacriticals_For_Symbols ; Combining_Diacritical_Marks_For_Symbols; Combining_Marks_For_Symbols -blk; Diacriticals_Sup ; Combining_Diacritical_Marks_Supplement -blk; Dingbats ; Dingbats -blk; Domino ; Domino_Tiles -blk; Duployan ; Duployan -blk; Early_Dynastic_Cuneiform ; Early_Dynastic_Cuneiform -blk; Egyptian_Hieroglyphs ; Egyptian_Hieroglyphs -blk; Elbasan ; Elbasan -blk; Emoticons ; Emoticons -blk; Enclosed_Alphanum ; Enclosed_Alphanumerics -blk; Enclosed_Alphanum_Sup ; Enclosed_Alphanumeric_Supplement -blk; Enclosed_CJK ; Enclosed_CJK_Letters_And_Months -blk; Enclosed_Ideographic_Sup ; Enclosed_Ideographic_Supplement -blk; Ethiopic ; Ethiopic -blk; Ethiopic_Ext ; Ethiopic_Extended -blk; Ethiopic_Ext_A ; Ethiopic_Extended_A -blk; Ethiopic_Sup ; Ethiopic_Supplement -blk; Geometric_Shapes ; Geometric_Shapes -blk; Geometric_Shapes_Ext ; Geometric_Shapes_Extended -blk; Georgian ; Georgian -blk; Georgian_Sup ; Georgian_Supplement -blk; Glagolitic ; Glagolitic -blk; Glagolitic_Sup ; Glagolitic_Supplement -blk; Gothic ; Gothic -blk; Grantha ; Grantha -blk; Greek ; Greek_And_Coptic -blk; Greek_Ext ; Greek_Extended -blk; Gujarati ; Gujarati -blk; Gurmukhi ; Gurmukhi -blk; Half_And_Full_Forms ; Halfwidth_And_Fullwidth_Forms -blk; Half_Marks ; Combining_Half_Marks -blk; Hangul ; Hangul_Syllables -blk; Hanunoo ; Hanunoo -blk; Hatran ; Hatran -blk; Hebrew ; Hebrew -blk; High_PU_Surrogates ; High_Private_Use_Surrogates -blk; High_Surrogates ; High_Surrogates -blk; Hiragana ; Hiragana -blk; IDC ; Ideographic_Description_Characters -blk; Ideographic_Symbols ; Ideographic_Symbols_And_Punctuation -blk; Imperial_Aramaic ; Imperial_Aramaic -blk; Indic_Number_Forms ; Common_Indic_Number_Forms -blk; Inscriptional_Pahlavi ; Inscriptional_Pahlavi -blk; Inscriptional_Parthian ; Inscriptional_Parthian -blk; IPA_Ext ; IPA_Extensions -blk; Jamo ; Hangul_Jamo -blk; Jamo_Ext_A ; Hangul_Jamo_Extended_A -blk; Jamo_Ext_B ; Hangul_Jamo_Extended_B -blk; Javanese ; Javanese -blk; Kaithi ; Kaithi -blk; Kana_Ext_A ; Kana_Extended_A -blk; Kana_Sup ; Kana_Supplement -blk; Kanbun ; Kanbun -blk; Kangxi ; Kangxi_Radicals -blk; Kannada ; Kannada -blk; Katakana ; Katakana -blk; Katakana_Ext ; Katakana_Phonetic_Extensions -blk; Kayah_Li ; Kayah_Li -blk; Kharoshthi ; Kharoshthi -blk; Khmer ; Khmer -blk; Khmer_Symbols ; Khmer_Symbols -blk; Khojki ; Khojki -blk; Khudawadi ; Khudawadi -blk; Lao ; Lao -blk; Latin_1_Sup ; Latin_1_Supplement ; Latin_1 -blk; Latin_Ext_A ; Latin_Extended_A -blk; Latin_Ext_Additional ; Latin_Extended_Additional -blk; Latin_Ext_B ; Latin_Extended_B -blk; Latin_Ext_C ; Latin_Extended_C -blk; Latin_Ext_D ; Latin_Extended_D -blk; Latin_Ext_E ; Latin_Extended_E -blk; Lepcha ; Lepcha -blk; Letterlike_Symbols ; Letterlike_Symbols -blk; Limbu ; Limbu -blk; Linear_A ; Linear_A -blk; Linear_B_Ideograms ; Linear_B_Ideograms -blk; Linear_B_Syllabary ; Linear_B_Syllabary -blk; Lisu ; Lisu -blk; Low_Surrogates ; Low_Surrogates -blk; Lycian ; Lycian -blk; Lydian ; Lydian -blk; Mahajani ; Mahajani -blk; Mahjong ; Mahjong_Tiles -blk; Malayalam ; Malayalam -blk; Mandaic ; Mandaic -blk; Manichaean ; Manichaean -blk; Marchen ; Marchen -blk; Masaram_Gondi ; Masaram_Gondi -blk; Math_Alphanum ; Mathematical_Alphanumeric_Symbols -blk; Math_Operators ; Mathematical_Operators -blk; Meetei_Mayek ; Meetei_Mayek -blk; Meetei_Mayek_Ext ; Meetei_Mayek_Extensions -blk; Mende_Kikakui ; Mende_Kikakui -blk; Meroitic_Cursive ; Meroitic_Cursive -blk; Meroitic_Hieroglyphs ; Meroitic_Hieroglyphs -blk; Miao ; Miao -blk; Misc_Arrows ; Miscellaneous_Symbols_And_Arrows -blk; Misc_Math_Symbols_A ; Miscellaneous_Mathematical_Symbols_A -blk; Misc_Math_Symbols_B ; Miscellaneous_Mathematical_Symbols_B -blk; Misc_Pictographs ; Miscellaneous_Symbols_And_Pictographs -blk; Misc_Symbols ; Miscellaneous_Symbols -blk; Misc_Technical ; Miscellaneous_Technical -blk; Modi ; Modi -blk; Modifier_Letters ; Spacing_Modifier_Letters -blk; Modifier_Tone_Letters ; Modifier_Tone_Letters -blk; Mongolian ; Mongolian -blk; Mongolian_Sup ; Mongolian_Supplement -blk; Mro ; Mro -blk; Multani ; Multani -blk; Music ; Musical_Symbols -blk; Myanmar ; Myanmar -blk; Myanmar_Ext_A ; Myanmar_Extended_A -blk; Myanmar_Ext_B ; Myanmar_Extended_B -blk; Nabataean ; Nabataean -blk; NB ; No_Block -blk; New_Tai_Lue ; New_Tai_Lue -blk; Newa ; Newa -blk; NKo ; NKo -blk; Number_Forms ; Number_Forms -blk; Nushu ; Nushu -blk; OCR ; Optical_Character_Recognition -blk; Ogham ; Ogham -blk; Ol_Chiki ; Ol_Chiki -blk; Old_Hungarian ; Old_Hungarian -blk; Old_Italic ; Old_Italic -blk; Old_North_Arabian ; Old_North_Arabian -blk; Old_Permic ; Old_Permic -blk; Old_Persian ; Old_Persian -blk; Old_South_Arabian ; Old_South_Arabian -blk; Old_Turkic ; Old_Turkic -blk; Oriya ; Oriya -blk; Ornamental_Dingbats ; Ornamental_Dingbats -blk; Osage ; Osage -blk; Osmanya ; Osmanya -blk; Pahawh_Hmong ; Pahawh_Hmong -blk; Palmyrene ; Palmyrene -blk; Pau_Cin_Hau ; Pau_Cin_Hau -blk; Phags_Pa ; Phags_Pa -blk; Phaistos ; Phaistos_Disc -blk; Phoenician ; Phoenician -blk; Phonetic_Ext ; Phonetic_Extensions -blk; Phonetic_Ext_Sup ; Phonetic_Extensions_Supplement -blk; Playing_Cards ; Playing_Cards -blk; Psalter_Pahlavi ; Psalter_Pahlavi -blk; PUA ; Private_Use_Area ; Private_Use -blk; Punctuation ; General_Punctuation -blk; Rejang ; Rejang -blk; Rumi ; Rumi_Numeral_Symbols -blk; Runic ; Runic -blk; Samaritan ; Samaritan -blk; Saurashtra ; Saurashtra -blk; Sharada ; Sharada -blk; Shavian ; Shavian -blk; Shorthand_Format_Controls ; Shorthand_Format_Controls -blk; Siddham ; Siddham -blk; Sinhala ; Sinhala -blk; Sinhala_Archaic_Numbers ; Sinhala_Archaic_Numbers -blk; Small_Forms ; Small_Form_Variants -blk; Sora_Sompeng ; Sora_Sompeng -blk; Soyombo ; Soyombo -blk; Specials ; Specials -blk; Sundanese ; Sundanese -blk; Sundanese_Sup ; Sundanese_Supplement -blk; Sup_Arrows_A ; Supplemental_Arrows_A -blk; Sup_Arrows_B ; Supplemental_Arrows_B -blk; Sup_Arrows_C ; Supplemental_Arrows_C -blk; Sup_Math_Operators ; Supplemental_Mathematical_Operators -blk; Sup_PUA_A ; Supplementary_Private_Use_Area_A -blk; Sup_PUA_B ; Supplementary_Private_Use_Area_B -blk; Sup_Punctuation ; Supplemental_Punctuation -blk; Sup_Symbols_And_Pictographs ; Supplemental_Symbols_And_Pictographs -blk; Super_And_Sub ; Superscripts_And_Subscripts -blk; Sutton_SignWriting ; Sutton_SignWriting -blk; Syloti_Nagri ; Syloti_Nagri -blk; Syriac ; Syriac -blk; Syriac_Sup ; Syriac_Supplement -blk; Tagalog ; Tagalog -blk; Tagbanwa ; Tagbanwa -blk; Tags ; Tags -blk; Tai_Le ; Tai_Le -blk; Tai_Tham ; Tai_Tham -blk; Tai_Viet ; Tai_Viet -blk; Tai_Xuan_Jing ; Tai_Xuan_Jing_Symbols -blk; Takri ; Takri -blk; Tamil ; Tamil -blk; Tangut ; Tangut -blk; Tangut_Components ; Tangut_Components -blk; Telugu ; Telugu -blk; Thaana ; Thaana -blk; Thai ; Thai -blk; Tibetan ; Tibetan -blk; Tifinagh ; Tifinagh -blk; Tirhuta ; Tirhuta -blk; Transport_And_Map ; Transport_And_Map_Symbols -blk; UCAS ; Unified_Canadian_Aboriginal_Syllabics; Canadian_Syllabics -blk; UCAS_Ext ; Unified_Canadian_Aboriginal_Syllabics_Extended -blk; Ugaritic ; Ugaritic -blk; Vai ; Vai -blk; Vedic_Ext ; Vedic_Extensions -blk; Vertical_Forms ; Vertical_Forms -blk; VS ; Variation_Selectors -blk; VS_Sup ; Variation_Selectors_Supplement -blk; Warang_Citi ; Warang_Citi -blk; Yi_Radicals ; Yi_Radicals -blk; Yi_Syllables ; Yi_Syllables -blk; Yijing ; Yijing_Hexagram_Symbols -blk; Zanabazar_Square ; Zanabazar_Square - -# Canonical_Combining_Class (ccc) - -ccc; 0; NR ; Not_Reordered -ccc; 1; OV ; Overlay -ccc; 7; NK ; Nukta -ccc; 8; KV ; Kana_Voicing -ccc; 9; VR ; Virama -ccc; 10; CCC10 ; CCC10 -ccc; 11; CCC11 ; CCC11 -ccc; 12; CCC12 ; CCC12 -ccc; 13; CCC13 ; CCC13 -ccc; 14; CCC14 ; CCC14 -ccc; 15; CCC15 ; CCC15 -ccc; 16; CCC16 ; CCC16 -ccc; 17; CCC17 ; CCC17 -ccc; 18; CCC18 ; CCC18 -ccc; 19; CCC19 ; CCC19 -ccc; 20; CCC20 ; CCC20 -ccc; 21; CCC21 ; CCC21 -ccc; 22; CCC22 ; CCC22 -ccc; 23; CCC23 ; CCC23 -ccc; 24; CCC24 ; CCC24 -ccc; 25; CCC25 ; CCC25 -ccc; 26; CCC26 ; CCC26 -ccc; 27; CCC27 ; CCC27 -ccc; 28; CCC28 ; CCC28 -ccc; 29; CCC29 ; CCC29 -ccc; 30; CCC30 ; CCC30 -ccc; 31; CCC31 ; CCC31 -ccc; 32; CCC32 ; CCC32 -ccc; 33; CCC33 ; CCC33 -ccc; 34; CCC34 ; CCC34 -ccc; 35; CCC35 ; CCC35 -ccc; 36; CCC36 ; CCC36 -ccc; 84; CCC84 ; CCC84 -ccc; 91; CCC91 ; CCC91 -ccc; 103; CCC103 ; CCC103 -ccc; 107; CCC107 ; CCC107 -ccc; 118; CCC118 ; CCC118 -ccc; 122; CCC122 ; CCC122 -ccc; 129; CCC129 ; CCC129 -ccc; 130; CCC130 ; CCC130 -ccc; 132; CCC132 ; CCC132 -ccc; 133; CCC133 ; CCC133 # RESERVED -ccc; 200; ATBL ; Attached_Below_Left -ccc; 202; ATB ; Attached_Below -ccc; 214; ATA ; Attached_Above -ccc; 216; ATAR ; Attached_Above_Right -ccc; 218; BL ; Below_Left -ccc; 220; B ; Below -ccc; 222; BR ; Below_Right -ccc; 224; L ; Left -ccc; 226; R ; Right -ccc; 228; AL ; Above_Left -ccc; 230; A ; Above -ccc; 232; AR ; Above_Right -ccc; 233; DB ; Double_Below -ccc; 234; DA ; Double_Above -ccc; 240; IS ; Iota_Subscript - -# Case_Folding (cf) - -# @missing: 0000..10FFFF; Case_Folding; - -# Case_Ignorable (CI) - -CI ; N ; No ; F ; False -CI ; Y ; Yes ; T ; True - -# Cased (Cased) - -Cased; N ; No ; F ; False -Cased; Y ; Yes ; T ; True - -# Changes_When_Casefolded (CWCF) - -CWCF; N ; No ; F ; False -CWCF; Y ; Yes ; T ; True - -# Changes_When_Casemapped (CWCM) - -CWCM; N ; No ; F ; False -CWCM; Y ; Yes ; T ; True - -# Changes_When_Lowercased (CWL) - -CWL; N ; No ; F ; False -CWL; Y ; Yes ; T ; True - -# Changes_When_NFKC_Casefolded (CWKCF) - -CWKCF; N ; No ; F ; False -CWKCF; Y ; Yes ; T ; True - -# Changes_When_Titlecased (CWT) - -CWT; N ; No ; F ; False -CWT; Y ; Yes ; T ; True - -# Changes_When_Uppercased (CWU) - -CWU; N ; No ; F ; False -CWU; Y ; Yes ; T ; True - -# Composition_Exclusion (CE) - -CE ; N ; No ; F ; False -CE ; Y ; Yes ; T ; True - -# Dash (Dash) - -Dash; N ; No ; F ; False -Dash; Y ; Yes ; T ; True - -# Decomposition_Mapping (dm) - -# @missing: 0000..10FFFF; Decomposition_Mapping; - -# Decomposition_Type (dt) - -dt ; Can ; Canonical ; can -dt ; Com ; Compat ; com -dt ; Enc ; Circle ; enc -dt ; Fin ; Final ; fin -dt ; Font ; Font ; font -dt ; Fra ; Fraction ; fra -dt ; Init ; Initial ; init -dt ; Iso ; Isolated ; iso -dt ; Med ; Medial ; med -dt ; Nar ; Narrow ; nar -dt ; Nb ; Nobreak ; nb -dt ; None ; None ; none -dt ; Sml ; Small ; sml -dt ; Sqr ; Square ; sqr -dt ; Sub ; Sub ; sub -dt ; Sup ; Super ; sup -dt ; Vert ; Vertical ; vert -dt ; Wide ; Wide ; wide - -# Default_Ignorable_Code_Point (DI) - -DI ; N ; No ; F ; False -DI ; Y ; Yes ; T ; True - -# Deprecated (Dep) - -Dep; N ; No ; F ; False -Dep; Y ; Yes ; T ; True - -# Diacritic (Dia) - -Dia; N ; No ; F ; False -Dia; Y ; Yes ; T ; True - -# East_Asian_Width (ea) - -ea ; A ; Ambiguous -ea ; F ; Fullwidth -ea ; H ; Halfwidth -ea ; N ; Neutral -ea ; Na ; Narrow -ea ; W ; Wide - -# Expands_On_NFC (XO_NFC) - -XO_NFC; N ; No ; F ; False -XO_NFC; Y ; Yes ; T ; True - -# Expands_On_NFD (XO_NFD) - -XO_NFD; N ; No ; F ; False -XO_NFD; Y ; Yes ; T ; True - -# Expands_On_NFKC (XO_NFKC) - -XO_NFKC; N ; No ; F ; False -XO_NFKC; Y ; Yes ; T ; True - -# Expands_On_NFKD (XO_NFKD) - -XO_NFKD; N ; No ; F ; False -XO_NFKD; Y ; Yes ; T ; True - -# Extender (Ext) - -Ext; N ; No ; F ; False -Ext; Y ; Yes ; T ; True - -# FC_NFKC_Closure (FC_NFKC) - -# @missing: 0000..10FFFF; FC_NFKC_Closure; - -# Full_Composition_Exclusion (Comp_Ex) - -Comp_Ex; N ; No ; F ; False -Comp_Ex; Y ; Yes ; T ; True - -# General_Category (gc) - -gc ; C ; Other # Cc | Cf | Cn | Co | Cs -gc ; Cc ; Control ; cntrl -gc ; Cf ; Format -gc ; Cn ; Unassigned -gc ; Co ; Private_Use -gc ; Cs ; Surrogate -gc ; L ; Letter # Ll | Lm | Lo | Lt | Lu -gc ; LC ; Cased_Letter # Ll | Lt | Lu -gc ; Ll ; Lowercase_Letter -gc ; Lm ; Modifier_Letter -gc ; Lo ; Other_Letter -gc ; Lt ; Titlecase_Letter -gc ; Lu ; Uppercase_Letter -gc ; M ; Mark ; Combining_Mark # Mc | Me | Mn -gc ; Mc ; Spacing_Mark -gc ; Me ; Enclosing_Mark -gc ; Mn ; Nonspacing_Mark -gc ; N ; Number # Nd | Nl | No -gc ; Nd ; Decimal_Number ; digit -gc ; Nl ; Letter_Number -gc ; No ; Other_Number -gc ; P ; Punctuation ; punct # Pc | Pd | Pe | Pf | Pi | Po | Ps -gc ; Pc ; Connector_Punctuation -gc ; Pd ; Dash_Punctuation -gc ; Pe ; Close_Punctuation -gc ; Pf ; Final_Punctuation -gc ; Pi ; Initial_Punctuation -gc ; Po ; Other_Punctuation -gc ; Ps ; Open_Punctuation -gc ; S ; Symbol # Sc | Sk | Sm | So -gc ; Sc ; Currency_Symbol -gc ; Sk ; Modifier_Symbol -gc ; Sm ; Math_Symbol -gc ; So ; Other_Symbol -gc ; Z ; Separator # Zl | Zp | Zs -gc ; Zl ; Line_Separator -gc ; Zp ; Paragraph_Separator -gc ; Zs ; Space_Separator -# @missing: 0000..10FFFF; General_Category; Unassigned - -# Grapheme_Base (Gr_Base) - -Gr_Base; N ; No ; F ; False -Gr_Base; Y ; Yes ; T ; True - -# Grapheme_Cluster_Break (GCB) - -GCB; CN ; Control -GCB; CR ; CR -GCB; EB ; E_Base -GCB; EBG ; E_Base_GAZ -GCB; EM ; E_Modifier -GCB; EX ; Extend -GCB; GAZ ; Glue_After_Zwj -GCB; L ; L -GCB; LF ; LF -GCB; LV ; LV -GCB; LVT ; LVT -GCB; PP ; Prepend -GCB; RI ; Regional_Indicator -GCB; SM ; SpacingMark -GCB; T ; T -GCB; V ; V -GCB; XX ; Other -GCB; ZWJ ; ZWJ - -# Grapheme_Extend (Gr_Ext) - -Gr_Ext; N ; No ; F ; False -Gr_Ext; Y ; Yes ; T ; True - -# Grapheme_Link (Gr_Link) - -Gr_Link; N ; No ; F ; False -Gr_Link; Y ; Yes ; T ; True - -# Hangul_Syllable_Type (hst) - -hst; L ; Leading_Jamo -hst; LV ; LV_Syllable -hst; LVT ; LVT_Syllable -hst; NA ; Not_Applicable -hst; T ; Trailing_Jamo -hst; V ; Vowel_Jamo - -# Hex_Digit (Hex) - -Hex; N ; No ; F ; False -Hex; Y ; Yes ; T ; True - -# Hyphen (Hyphen) - -Hyphen; N ; No ; F ; False -Hyphen; Y ; Yes ; T ; True - -# IDS_Binary_Operator (IDSB) - -IDSB; N ; No ; F ; False -IDSB; Y ; Yes ; T ; True - -# IDS_Trinary_Operator (IDST) - -IDST; N ; No ; F ; False -IDST; Y ; Yes ; T ; True - -# ID_Continue (IDC) - -IDC; N ; No ; F ; False -IDC; Y ; Yes ; T ; True - -# ID_Start (IDS) - -IDS; N ; No ; F ; False -IDS; Y ; Yes ; T ; True - -# ISO_Comment (isc) - -# @missing: 0000..10FFFF; ISO_Comment; - -# Ideographic (Ideo) - -Ideo; N ; No ; F ; False -Ideo; Y ; Yes ; T ; True - -# Indic_Positional_Category (InPC) - -InPC; Bottom ; Bottom -InPC; Bottom_And_Left ; Bottom_And_Left -InPC; Bottom_And_Right ; Bottom_And_Right -InPC; Left ; Left -InPC; Left_And_Right ; Left_And_Right -InPC; NA ; NA -InPC; Overstruck ; Overstruck -InPC; Right ; Right -InPC; Top ; Top -InPC; Top_And_Bottom ; Top_And_Bottom -InPC; Top_And_Bottom_And_Right ; Top_And_Bottom_And_Right -InPC; Top_And_Left ; Top_And_Left -InPC; Top_And_Left_And_Right ; Top_And_Left_And_Right -InPC; Top_And_Right ; Top_And_Right -InPC; Visual_Order_Left ; Visual_Order_Left - -# Indic_Syllabic_Category (InSC) - -InSC; Avagraha ; Avagraha -InSC; Bindu ; Bindu -InSC; Brahmi_Joining_Number ; Brahmi_Joining_Number -InSC; Cantillation_Mark ; Cantillation_Mark -InSC; Consonant ; Consonant -InSC; Consonant_Dead ; Consonant_Dead -InSC; Consonant_Final ; Consonant_Final -InSC; Consonant_Head_Letter ; Consonant_Head_Letter -InSC; Consonant_Killer ; Consonant_Killer -InSC; Consonant_Medial ; Consonant_Medial -InSC; Consonant_Placeholder ; Consonant_Placeholder -InSC; Consonant_Preceding_Repha ; Consonant_Preceding_Repha -InSC; Consonant_Prefixed ; Consonant_Prefixed -InSC; Consonant_Subjoined ; Consonant_Subjoined -InSC; Consonant_Succeeding_Repha ; Consonant_Succeeding_Repha -InSC; Consonant_With_Stacker ; Consonant_With_Stacker -InSC; Gemination_Mark ; Gemination_Mark -InSC; Invisible_Stacker ; Invisible_Stacker -InSC; Joiner ; Joiner -InSC; Modifying_Letter ; Modifying_Letter -InSC; Non_Joiner ; Non_Joiner -InSC; Nukta ; Nukta -InSC; Number ; Number -InSC; Number_Joiner ; Number_Joiner -InSC; Other ; Other -InSC; Pure_Killer ; Pure_Killer -InSC; Register_Shifter ; Register_Shifter -InSC; Syllable_Modifier ; Syllable_Modifier -InSC; Tone_Letter ; Tone_Letter -InSC; Tone_Mark ; Tone_Mark -InSC; Virama ; Virama -InSC; Visarga ; Visarga -InSC; Vowel ; Vowel -InSC; Vowel_Dependent ; Vowel_Dependent -InSC; Vowel_Independent ; Vowel_Independent - -# Jamo_Short_Name (JSN) - -JSN; A ; A -JSN; AE ; AE -JSN; B ; B -JSN; BB ; BB -JSN; BS ; BS -JSN; C ; C -JSN; D ; D -JSN; DD ; DD -JSN; E ; E -JSN; EO ; EO -JSN; EU ; EU -JSN; G ; G -JSN; GG ; GG -JSN; GS ; GS -JSN; H ; H -JSN; I ; I -JSN; J ; J -JSN; JJ ; JJ -JSN; K ; K -JSN; L ; L -JSN; LB ; LB -JSN; LG ; LG -JSN; LH ; LH -JSN; LM ; LM -JSN; LP ; LP -JSN; LS ; LS -JSN; LT ; LT -JSN; M ; M -JSN; N ; N -JSN; NG ; NG -JSN; NH ; NH -JSN; NJ ; NJ -JSN; O ; O -JSN; OE ; OE -JSN; P ; P -JSN; R ; R -JSN; S ; S -JSN; SS ; SS -JSN; T ; T -JSN; U ; U -JSN; WA ; WA -JSN; WAE ; WAE -JSN; WE ; WE -JSN; WEO ; WEO -JSN; WI ; WI -JSN; YA ; YA -JSN; YAE ; YAE -JSN; YE ; YE -JSN; YEO ; YEO -JSN; YI ; YI -JSN; YO ; YO -JSN; YU ; YU -# @missing: 0000..10FFFF; Jamo_Short_Name; - -# Join_Control (Join_C) - -Join_C; N ; No ; F ; False -Join_C; Y ; Yes ; T ; True - -# Joining_Group (jg) - -jg ; African_Feh ; African_Feh -jg ; African_Noon ; African_Noon -jg ; African_Qaf ; African_Qaf -jg ; Ain ; Ain -jg ; Alaph ; Alaph -jg ; Alef ; Alef -jg ; Beh ; Beh -jg ; Beth ; Beth -jg ; Burushaski_Yeh_Barree ; Burushaski_Yeh_Barree -jg ; Dal ; Dal -jg ; Dalath_Rish ; Dalath_Rish -jg ; E ; E -jg ; Farsi_Yeh ; Farsi_Yeh -jg ; Fe ; Fe -jg ; Feh ; Feh -jg ; Final_Semkath ; Final_Semkath -jg ; Gaf ; Gaf -jg ; Gamal ; Gamal -jg ; Hah ; Hah -jg ; He ; He -jg ; Heh ; Heh -jg ; Heh_Goal ; Heh_Goal -jg ; Heth ; Heth -jg ; Kaf ; Kaf -jg ; Kaph ; Kaph -jg ; Khaph ; Khaph -jg ; Knotted_Heh ; Knotted_Heh -jg ; Lam ; Lam -jg ; Lamadh ; Lamadh -jg ; Malayalam_Bha ; Malayalam_Bha -jg ; Malayalam_Ja ; Malayalam_Ja -jg ; Malayalam_Lla ; Malayalam_Lla -jg ; Malayalam_Llla ; Malayalam_Llla -jg ; Malayalam_Nga ; Malayalam_Nga -jg ; Malayalam_Nna ; Malayalam_Nna -jg ; Malayalam_Nnna ; Malayalam_Nnna -jg ; Malayalam_Nya ; Malayalam_Nya -jg ; Malayalam_Ra ; Malayalam_Ra -jg ; Malayalam_Ssa ; Malayalam_Ssa -jg ; Malayalam_Tta ; Malayalam_Tta -jg ; Manichaean_Aleph ; Manichaean_Aleph -jg ; Manichaean_Ayin ; Manichaean_Ayin -jg ; Manichaean_Beth ; Manichaean_Beth -jg ; Manichaean_Daleth ; Manichaean_Daleth -jg ; Manichaean_Dhamedh ; Manichaean_Dhamedh -jg ; Manichaean_Five ; Manichaean_Five -jg ; Manichaean_Gimel ; Manichaean_Gimel -jg ; Manichaean_Heth ; Manichaean_Heth -jg ; Manichaean_Hundred ; Manichaean_Hundred -jg ; Manichaean_Kaph ; Manichaean_Kaph -jg ; Manichaean_Lamedh ; Manichaean_Lamedh -jg ; Manichaean_Mem ; Manichaean_Mem -jg ; Manichaean_Nun ; Manichaean_Nun -jg ; Manichaean_One ; Manichaean_One -jg ; Manichaean_Pe ; Manichaean_Pe -jg ; Manichaean_Qoph ; Manichaean_Qoph -jg ; Manichaean_Resh ; Manichaean_Resh -jg ; Manichaean_Sadhe ; Manichaean_Sadhe -jg ; Manichaean_Samekh ; Manichaean_Samekh -jg ; Manichaean_Taw ; Manichaean_Taw -jg ; Manichaean_Ten ; Manichaean_Ten -jg ; Manichaean_Teth ; Manichaean_Teth -jg ; Manichaean_Thamedh ; Manichaean_Thamedh -jg ; Manichaean_Twenty ; Manichaean_Twenty -jg ; Manichaean_Waw ; Manichaean_Waw -jg ; Manichaean_Yodh ; Manichaean_Yodh -jg ; Manichaean_Zayin ; Manichaean_Zayin -jg ; Meem ; Meem -jg ; Mim ; Mim -jg ; No_Joining_Group ; No_Joining_Group -jg ; Noon ; Noon -jg ; Nun ; Nun -jg ; Nya ; Nya -jg ; Pe ; Pe -jg ; Qaf ; Qaf -jg ; Qaph ; Qaph -jg ; Reh ; Reh -jg ; Reversed_Pe ; Reversed_Pe -jg ; Rohingya_Yeh ; Rohingya_Yeh -jg ; Sad ; Sad -jg ; Sadhe ; Sadhe -jg ; Seen ; Seen -jg ; Semkath ; Semkath -jg ; Shin ; Shin -jg ; Straight_Waw ; Straight_Waw -jg ; Swash_Kaf ; Swash_Kaf -jg ; Syriac_Waw ; Syriac_Waw -jg ; Tah ; Tah -jg ; Taw ; Taw -jg ; Teh_Marbuta ; Teh_Marbuta -jg ; Teh_Marbuta_Goal ; Hamza_On_Heh_Goal -jg ; Teth ; Teth -jg ; Waw ; Waw -jg ; Yeh ; Yeh -jg ; Yeh_Barree ; Yeh_Barree -jg ; Yeh_With_Tail ; Yeh_With_Tail -jg ; Yudh ; Yudh -jg ; Yudh_He ; Yudh_He -jg ; Zain ; Zain -jg ; Zhain ; Zhain - -# Joining_Type (jt) - -jt ; C ; Join_Causing -jt ; D ; Dual_Joining -jt ; L ; Left_Joining -jt ; R ; Right_Joining -jt ; T ; Transparent -jt ; U ; Non_Joining - -# Line_Break (lb) - -lb ; AI ; Ambiguous -lb ; AL ; Alphabetic -lb ; B2 ; Break_Both -lb ; BA ; Break_After -lb ; BB ; Break_Before -lb ; BK ; Mandatory_Break -lb ; CB ; Contingent_Break -lb ; CJ ; Conditional_Japanese_Starter -lb ; CL ; Close_Punctuation -lb ; CM ; Combining_Mark -lb ; CP ; Close_Parenthesis -lb ; CR ; Carriage_Return -lb ; EB ; E_Base -lb ; EM ; E_Modifier -lb ; EX ; Exclamation -lb ; GL ; Glue -lb ; H2 ; H2 -lb ; H3 ; H3 -lb ; HL ; Hebrew_Letter -lb ; HY ; Hyphen -lb ; ID ; Ideographic -lb ; IN ; Inseparable ; Inseperable -lb ; IS ; Infix_Numeric -lb ; JL ; JL -lb ; JT ; JT -lb ; JV ; JV -lb ; LF ; Line_Feed -lb ; NL ; Next_Line -lb ; NS ; Nonstarter -lb ; NU ; Numeric -lb ; OP ; Open_Punctuation -lb ; PO ; Postfix_Numeric -lb ; PR ; Prefix_Numeric -lb ; QU ; Quotation -lb ; RI ; Regional_Indicator -lb ; SA ; Complex_Context -lb ; SG ; Surrogate -lb ; SP ; Space -lb ; SY ; Break_Symbols -lb ; WJ ; Word_Joiner -lb ; XX ; Unknown -lb ; ZW ; ZWSpace -lb ; ZWJ ; ZWJ - -# Logical_Order_Exception (LOE) - -LOE; N ; No ; F ; False -LOE; Y ; Yes ; T ; True - -# Lowercase (Lower) - -Lower; N ; No ; F ; False -Lower; Y ; Yes ; T ; True - -# Lowercase_Mapping (lc) - -# @missing: 0000..10FFFF; Lowercase_Mapping; - -# Math (Math) - -Math; N ; No ; F ; False -Math; Y ; Yes ; T ; True - -# NFC_Quick_Check (NFC_QC) - -NFC_QC; M ; Maybe -NFC_QC; N ; No -NFC_QC; Y ; Yes - -# NFD_Quick_Check (NFD_QC) - -NFD_QC; N ; No -NFD_QC; Y ; Yes - -# NFKC_Casefold (NFKC_CF) - -# @missing: 0000..10FFFF; NFKC_Casefold; - -# NFKC_Quick_Check (NFKC_QC) - -NFKC_QC; M ; Maybe -NFKC_QC; N ; No -NFKC_QC; Y ; Yes - -# NFKD_Quick_Check (NFKD_QC) - -NFKD_QC; N ; No -NFKD_QC; Y ; Yes - -# Name (na) - -# @missing: 0000..10FFFF; Name; - -# Name_Alias (Name_Alias) - -# @missing: 0000..10FFFF; Name_Alias; - -# Noncharacter_Code_Point (NChar) - -NChar; N ; No ; F ; False -NChar; Y ; Yes ; T ; True - -# Numeric_Type (nt) - -nt ; De ; Decimal -nt ; Di ; Digit -nt ; None ; None -nt ; Nu ; Numeric - -# Numeric_Value (nv) - -# @missing: 0000..10FFFF; Numeric_Value; NaN - -# Other_Alphabetic (OAlpha) - -OAlpha; N ; No ; F ; False -OAlpha; Y ; Yes ; T ; True - -# Other_Default_Ignorable_Code_Point (ODI) - -ODI; N ; No ; F ; False -ODI; Y ; Yes ; T ; True - -# Other_Grapheme_Extend (OGr_Ext) - -OGr_Ext; N ; No ; F ; False -OGr_Ext; Y ; Yes ; T ; True - -# Other_ID_Continue (OIDC) - -OIDC; N ; No ; F ; False -OIDC; Y ; Yes ; T ; True - -# Other_ID_Start (OIDS) - -OIDS; N ; No ; F ; False -OIDS; Y ; Yes ; T ; True - -# Other_Lowercase (OLower) - -OLower; N ; No ; F ; False -OLower; Y ; Yes ; T ; True - -# Other_Math (OMath) - -OMath; N ; No ; F ; False -OMath; Y ; Yes ; T ; True - -# Other_Uppercase (OUpper) - -OUpper; N ; No ; F ; False -OUpper; Y ; Yes ; T ; True - -# Pattern_Syntax (Pat_Syn) - -Pat_Syn; N ; No ; F ; False -Pat_Syn; Y ; Yes ; T ; True - -# Pattern_White_Space (Pat_WS) - -Pat_WS; N ; No ; F ; False -Pat_WS; Y ; Yes ; T ; True - -# Prepended_Concatenation_Mark (PCM) - -PCM; N ; No ; F ; False -PCM; Y ; Yes ; T ; True - -# Quotation_Mark (QMark) - -QMark; N ; No ; F ; False -QMark; Y ; Yes ; T ; True - -# Radical (Radical) - -Radical; N ; No ; F ; False -Radical; Y ; Yes ; T ; True - -# Regional_Indicator (RI) - -RI ; N ; No ; F ; False -RI ; Y ; Yes ; T ; True - -# Script (sc) - -sc ; Adlm ; Adlam -sc ; Aghb ; Caucasian_Albanian -sc ; Ahom ; Ahom -sc ; Arab ; Arabic -sc ; Armi ; Imperial_Aramaic -sc ; Armn ; Armenian -sc ; Avst ; Avestan -sc ; Bali ; Balinese -sc ; Bamu ; Bamum -sc ; Bass ; Bassa_Vah -sc ; Batk ; Batak -sc ; Beng ; Bengali -sc ; Bhks ; Bhaiksuki -sc ; Bopo ; Bopomofo -sc ; Brah ; Brahmi -sc ; Brai ; Braille -sc ; Bugi ; Buginese -sc ; Buhd ; Buhid -sc ; Cakm ; Chakma -sc ; Cans ; Canadian_Aboriginal -sc ; Cari ; Carian -sc ; Cham ; Cham -sc ; Cher ; Cherokee -sc ; Copt ; Coptic ; Qaac -sc ; Cprt ; Cypriot -sc ; Cyrl ; Cyrillic -sc ; Deva ; Devanagari -sc ; Dsrt ; Deseret -sc ; Dupl ; Duployan -sc ; Egyp ; Egyptian_Hieroglyphs -sc ; Elba ; Elbasan -sc ; Ethi ; Ethiopic -sc ; Geor ; Georgian -sc ; Glag ; Glagolitic -sc ; Gonm ; Masaram_Gondi -sc ; Goth ; Gothic -sc ; Gran ; Grantha -sc ; Grek ; Greek -sc ; Gujr ; Gujarati -sc ; Guru ; Gurmukhi -sc ; Hang ; Hangul -sc ; Hani ; Han -sc ; Hano ; Hanunoo -sc ; Hatr ; Hatran -sc ; Hebr ; Hebrew -sc ; Hira ; Hiragana -sc ; Hluw ; Anatolian_Hieroglyphs -sc ; Hmng ; Pahawh_Hmong -sc ; Hrkt ; Katakana_Or_Hiragana -sc ; Hung ; Old_Hungarian -sc ; Ital ; Old_Italic -sc ; Java ; Javanese -sc ; Kali ; Kayah_Li -sc ; Kana ; Katakana -sc ; Khar ; Kharoshthi -sc ; Khmr ; Khmer -sc ; Khoj ; Khojki -sc ; Knda ; Kannada -sc ; Kthi ; Kaithi -sc ; Lana ; Tai_Tham -sc ; Laoo ; Lao -sc ; Latn ; Latin -sc ; Lepc ; Lepcha -sc ; Limb ; Limbu -sc ; Lina ; Linear_A -sc ; Linb ; Linear_B -sc ; Lisu ; Lisu -sc ; Lyci ; Lycian -sc ; Lydi ; Lydian -sc ; Mahj ; Mahajani -sc ; Mand ; Mandaic -sc ; Mani ; Manichaean -sc ; Marc ; Marchen -sc ; Mend ; Mende_Kikakui -sc ; Merc ; Meroitic_Cursive -sc ; Mero ; Meroitic_Hieroglyphs -sc ; Mlym ; Malayalam -sc ; Modi ; Modi -sc ; Mong ; Mongolian -sc ; Mroo ; Mro -sc ; Mtei ; Meetei_Mayek -sc ; Mult ; Multani -sc ; Mymr ; Myanmar -sc ; Narb ; Old_North_Arabian -sc ; Nbat ; Nabataean -sc ; Newa ; Newa -sc ; Nkoo ; Nko -sc ; Nshu ; Nushu -sc ; Ogam ; Ogham -sc ; Olck ; Ol_Chiki -sc ; Orkh ; Old_Turkic -sc ; Orya ; Oriya -sc ; Osge ; Osage -sc ; Osma ; Osmanya -sc ; Palm ; Palmyrene -sc ; Pauc ; Pau_Cin_Hau -sc ; Perm ; Old_Permic -sc ; Phag ; Phags_Pa -sc ; Phli ; Inscriptional_Pahlavi -sc ; Phlp ; Psalter_Pahlavi -sc ; Phnx ; Phoenician -sc ; Plrd ; Miao -sc ; Prti ; Inscriptional_Parthian -sc ; Rjng ; Rejang -sc ; Runr ; Runic -sc ; Samr ; Samaritan -sc ; Sarb ; Old_South_Arabian -sc ; Saur ; Saurashtra -sc ; Sgnw ; SignWriting -sc ; Shaw ; Shavian -sc ; Shrd ; Sharada -sc ; Sidd ; Siddham -sc ; Sind ; Khudawadi -sc ; Sinh ; Sinhala -sc ; Sora ; Sora_Sompeng -sc ; Soyo ; Soyombo -sc ; Sund ; Sundanese -sc ; Sylo ; Syloti_Nagri -sc ; Syrc ; Syriac -sc ; Tagb ; Tagbanwa -sc ; Takr ; Takri -sc ; Tale ; Tai_Le -sc ; Talu ; New_Tai_Lue -sc ; Taml ; Tamil -sc ; Tang ; Tangut -sc ; Tavt ; Tai_Viet -sc ; Telu ; Telugu -sc ; Tfng ; Tifinagh -sc ; Tglg ; Tagalog -sc ; Thaa ; Thaana -sc ; Thai ; Thai -sc ; Tibt ; Tibetan -sc ; Tirh ; Tirhuta -sc ; Ugar ; Ugaritic -sc ; Vaii ; Vai -sc ; Wara ; Warang_Citi -sc ; Xpeo ; Old_Persian -sc ; Xsux ; Cuneiform -sc ; Yiii ; Yi -sc ; Zanb ; Zanabazar_Square -sc ; Zinh ; Inherited ; Qaai -sc ; Zyyy ; Common -sc ; Zzzz ; Unknown - -# Script_Extensions (scx) - -# @missing: 0000..10FFFF; Script_Extensions;