From 7dba7100611ebf79d900d7f4c45a226ad313957e Mon Sep 17 00:00:00 2001 From: Philip Craig Date: Wed, 2 Sep 2015 14:15:28 +1000 Subject: [PATCH 01/29] Return wcwidth of 0 for combining characters --- bin/wcwidth-browser.py | 5 ++--- wcwidth/tests/test_core.py | 4 ++-- wcwidth/wcwidth.py | 16 ++++++---------- 3 files changed, 10 insertions(+), 15 deletions(-) diff --git a/bin/wcwidth-browser.py b/bin/wcwidth-browser.py index 0e2d915..e7f121c 100755 --- a/bin/wcwidth-browser.py +++ b/bin/wcwidth-browser.py @@ -158,7 +158,7 @@ def __init__(self, width=1): if _val <= LIMIT_UCS]: self.characters.append(letters_o[:1] + unichr(val) + - letters_o[1:]) + letters_o[wcwidth(unichr(val))+1:]) self.characters.reverse() def __iter__(self): @@ -647,8 +647,7 @@ def text_entry(self, ucs, name): delimiter = style.attr_minor(style.delimiter) if len(ucs) != 1: # determine display of combining characters - val = ord(next((_ucs for _ucs in ucs - if wcwidth(_ucs) == -1))) + val = ord(ucs[1]) # a combining character displayed of any fg color # will reset the foreground character of the cell # combined with (iTerm2, OSX). diff --git a/wcwidth/tests/test_core.py b/wcwidth/tests/test_core.py index 59a4750..313c031 100755 --- a/wcwidth/tests/test_core.py +++ b/wcwidth/tests/test_core.py @@ -82,7 +82,7 @@ def test_combining_width_negative_1(): """Simple test combining reports total width of 4.""" # given, phrase = u'--\u05bf--' - expect_length_each = (1, 1, -1, 1, 1) + expect_length_each = (1, 1, 0, 1, 1) expect_length_phrase = 4 # exercise, @@ -97,7 +97,7 @@ def test_combining_width_negative_1(): def test_combining_cafe(): u"""cafe + COMBINING ACUTE ACCENT is café of length 4.""" phrase = u"cafe\u0301" - expect_length_each = (1, 1, 1, 1, -1) + expect_length_each = (1, 1, 1, 1, 0) expect_length_phrase = 4 # exercise, diff --git a/wcwidth/wcwidth.py b/wcwidth/wcwidth.py index 802316a..3befaa9 100644 --- a/wcwidth/wcwidth.py +++ b/wcwidth/wcwidth.py @@ -114,16 +114,16 @@ def wcwidth(wc): The following have a column width of -1: - - Non-spacing and enclosing combining characters (general - category code Mn or Me in the Unicode database). Generally, - having a non-zero value returned by ``unicodedata.combining()``. - - C0 control characters (U+001 through U+01F). - C1 control characters and DEL (U+07F through U+0A0). The following have a column width of 0: + - Non-spacing and enclosing combining characters (general + category code Mn or Me in the Unicode database). Generally, + having a non-zero value returned by ``unicodedata.combining()``. + - NULL (U+0000, 0). - COMBINING GRAPHEME JOINER (U+034F). @@ -174,10 +174,9 @@ def wcwidth(wc): if ucs < 32 or 0x07F <= ucs < 0x0A0: return -1 - # combining characters have indeterminate effects unless - # combined with additional characters. + # combining characters have zero width if _bisearch(ucs, NONZERO_COMBINING): - return -1 + return 0 return 1 + _bisearch(ucs, WIDE_EASTASIAN) @@ -199,9 +198,6 @@ def wcswidth(pwcs, n=None): for char in pwcs[idx]: wcw = wcwidth(char) if wcw < 0: - ucs = ord(char) - if _bisearch(ucs, NONZERO_COMBINING): - continue return -1 else: width += wcw From 72ef7fe5137059ba88ea56715d1584bf07a4adcb Mon Sep 17 00:00:00 2001 From: Philip Craig Date: Wed, 2 Sep 2015 14:41:06 +1000 Subject: [PATCH 02/29] Use general category to determine zero width combining characters Previously, the canonical combining class was used to determine which characters are zero width combining characters. This had two problems: - it classified spacing marks (category Mc) as zero width - it classified enclosing marks (category Me) as normal characters Fix by using the general category to generate a table of combining characters that have zero width. Characters with a general category of Mn or Me are included in this table. Characters with a general category of Mc are not included, and so use the default width of 1. --- README.rst | 12 +- bin/wcwidth-browser.py | 5 +- bin/wcwidth-combining-comparator.py | 6 +- setup.py | 27 ++- wcwidth/table_comb.py | 240 +++++++++++++++++-------- wcwidth/table_wide.py | 6 +- wcwidth/table_zero.py | 270 ++++++++++++++++++++++++++++ wcwidth/tests/test_core.py | 28 +++ wcwidth/wcwidth.py | 10 +- 9 files changed, 500 insertions(+), 104 deletions(-) create mode 100644 wcwidth/table_zero.py diff --git a/README.rst b/README.rst index ddec997..066c111 100644 --- a/README.rst +++ b/README.rst @@ -38,9 +38,9 @@ than the most current Unicode Standard release files, which this project aims to track. The most current release of this API is based from Unicode Standard release -*7.0.0*, dated *2014-02-28, 23:15:00 GMT [KW, LI]* for table generated by -file ``EastAsianWidth-7.0.0.txt`` and *2014-02-07, 18:42:08 GMT [MD]* for -``DerivedCombiningClass-7.0.0.txt``. +*8.0.0*, dated *2015-02-10, 21:00:00 GMT [KW, LI]* for table generated by +file ``EastAsianWidth-8.0.0.txt`` and *2015-02-13, 13:47:11 GMT [MD]* for +``DerivedGeneralCategory-8.0.0.txt``. Installation ------------ @@ -140,12 +140,14 @@ Updating Tables The command ``python setup.py update`` will fetch the following resources: - http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt -- http://www.unicode.org/Public/UNIDATA/extracted/DerivedCombiningClass.txt +- http://www.unicode.org/Public/UNIDATA/extracted/DerivedGeneralCategory.txt -And generate the table files `wcwidth/table_wide.py`_ and `wcwidth/table_comb.py`_. +And generate the table files `wcwidth/table_wide.py`_, +`wcwidth/table_comb.py`_, and `wcwidth/table_zero.py`_. .. _`wcwidth/table_wide.py`: https://github.com/jquast/wcwidth/tree/master/wcwidth/table_wide.py .. _`wcwidth/table_comb.py`: https://github.com/jquast/wcwidth/tree/master/wcwidth/table_comb.py +.. _`wcwidth/table_zero.py`: https://github.com/jquast/wcwidth/tree/master/wcwidth/table_zero.py wcwidth.c --------- diff --git a/bin/wcwidth-browser.py b/bin/wcwidth-browser.py index e7f121c..b137f67 100755 --- a/bin/wcwidth-browser.py +++ b/bin/wcwidth-browser.py @@ -37,7 +37,7 @@ import signal # local -from wcwidth import wcwidth, table_comb +from wcwidth.wcwidth import _bisearch, wcwidth, COMBINING # 3rd-party from blessed import Terminal @@ -116,6 +116,7 @@ def __init__(self, width=2): self.characters = (unichr(idx) for idx in xrange(LIMIT_UCS) if wcwidth(unichr(idx)) == width + and not _bisearch(idx, COMBINING) ) def __iter__(self): @@ -152,7 +153,7 @@ def __init__(self, width=1): """ self.characters = [] letters_o = (u'o' * width) - for boundaries in table_comb.NONZERO_COMBINING: + for boundaries in COMBINING: for val in [_val for _val in range(boundaries[0], boundaries[1] + 1) if _val <= LIMIT_UCS]: diff --git a/bin/wcwidth-combining-comparator.py b/bin/wcwidth-combining-comparator.py index 000a017..c389c38 100755 --- a/bin/wcwidth-combining-comparator.py +++ b/bin/wcwidth-combining-comparator.py @@ -18,7 +18,7 @@ import sys # local imports -from wcwidth.wcwidth import _bisearch, NONZERO_COMBINING +from wcwidth.wcwidth import _bisearch, COMBINING def report_comb_msg(ucs, comb_py, comb_wc): @@ -66,8 +66,8 @@ def report_comb_msg(ucs, comb_py, comb_wc): def _is_equal_combining(ucs): - comb_py = bool(unicodedata.combining(ucs)) - comb_wc = bool(_bisearch(ord(ucs), NONZERO_COMBINING)) + comb_py = bool(unicodedata.category(ucs) in ['Mc', 'Me', 'Mn']) + comb_wc = bool(_bisearch(ord(ucs), COMBINING)) assert comb_py == comb_wc, report_comb_msg(ucs, comb_py, comb_wc) diff --git a/setup.py b/setup.py index fc573dc..373e723 100755 --- a/setup.py +++ b/setup.py @@ -47,9 +47,10 @@ class SetupUpdate(setuptools.Command): EAW_OUT = os.path.join(HERE, 'wcwidth', 'table_wide.py') UCD_URL = ('http://www.unicode.org/Public/UNIDATA/extracted/' - 'DerivedCombiningClass.txt') - UCD_IN = os.path.join(HERE, 'data', 'DerivedCombiningClass.txt') + 'DerivedGeneralCategory.txt') + UCD_IN = os.path.join(HERE, 'data', 'DerivedGeneralCategory.txt') CMB_OUT = os.path.join(HERE, 'wcwidth', 'table_comb.py') + ZERO_OUT = os.path.join(HERE, 'wcwidth', 'table_zero.py') def initialize_options(self): """Override builtin method: no options are available.""" @@ -60,10 +61,11 @@ def finalize_options(self): pass def run(self): - """Execute command: update east-asian and combining tables.""" + """Execute command: update east-asian, combining and zero width tables.""" assert os.getenv('VIRTUAL_ENV'), 'You should be in a virtualenv' self.do_east_asian_width() self.do_combining() + self.do_zero_width() def do_east_asian_width(self): """Fetch and update east-asian tables.""" @@ -75,9 +77,16 @@ def do_east_asian_width(self): def do_combining(self): """Fetch and update combining tables.""" self._do_retrieve(self.UCD_URL, self.UCD_IN) - (version, date, values) = self._do_combining_parse(self.UCD_IN) + (version, date, values) = self._do_category_parse(self.UCD_IN, ('Mc', 'Me', 'Mn',)) table = self._make_table(values) - self._do_write(self.CMB_OUT, 'NONZERO_COMBINING', version, date, table) + self._do_write(self.CMB_OUT, 'COMBINING', version, date, table) + + def do_zero_width(self): + """Fetch and update zero width tables.""" + self._do_retrieve(self.UCD_URL, self.UCD_IN) + (version, date, values) = self._do_category_parse(self.UCD_IN, ('Me', 'Mn',)) + table = self._make_table(values) + self._do_write(self.ZERO_OUT, 'ZERO_WIDTH', version, date, table) @staticmethod def _make_table(values): @@ -143,8 +152,8 @@ def _do_east_asian_width_parse(fname, return version, date, sorted(values) @staticmethod - def _do_combining_parse(fname, exclude_values=(0,)): - """Parse unicode combining tables.""" + def _do_category_parse(fname, categories): + """Parse unicode category tables.""" version, date, values = None, None, [] print("parsing {} ..".format(fname)) for line in open(fname, 'rb'): @@ -159,8 +168,8 @@ def _do_combining_parse(fname, exclude_values=(0,)): continue addrs, details = uline.split(';', 1) addrs, details = addrs.rstrip(), details.lstrip() - if not any(details.startswith('{} #'.format(value)) - for value in exclude_values): + if any(details.startswith('{} #'.format(value)) + for value in categories): start, stop = addrs, addrs if '..' in addrs: start, stop = addrs.split('..') diff --git a/wcwidth/table_comb.py b/wcwidth/table_comb.py index 95eecb7..4203ff2 100644 --- a/wcwidth/table_comb.py +++ b/wcwidth/table_comb.py @@ -1,11 +1,10 @@ -"""Nonzero_Combining table. Created by setup.py.""" -# Generated: 2014-11-20T06:55:26.612062 -# Source: DerivedCombiningClass-7.0.0.txt -# Date: 2014-02-07, 18:42:08 GMT [MD] -NONZERO_COMBINING = ( - (0x0300, 0x034e,), # Combining Grave Accent ..Combining Upwards Arrow - (0x0350, 0x036f,), # Combining Right Arrowhea..Combining Latin Small Le - (0x0483, 0x0487,), # Combining Cyrillic Titlo..Combining Cyrillic Pokry +"""Combining table. Created by setup.py.""" +# Generated: 2015-09-02T01:27:58.918349 +# Source: DerivedGeneralCategory-8.0.0.txt +# Date: 2015-02-13, 13:47:11 GMT [MD] +COMBINING = ( + (0x0300, 0x036f,), # Combining Grave Accent ..Combining Latin Small Le + (0x0483, 0x0489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli (0x0591, 0x05bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg (0x05bf, 0x05bf,), # Hebrew Point Rafe ..Hebrew Point Rafe (0x05c1, 0x05c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot @@ -20,137 +19,224 @@ (0x06ea, 0x06ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem (0x0711, 0x0711,), # Syriac Letter Superscrip..Syriac Letter Superscrip (0x0730, 0x074a,), # Syriac Pthaha Above ..Syriac Barrekh + (0x07a6, 0x07b0,), # Thaana Abafili ..Thaana Sukun (0x07eb, 0x07f3,), # Nko Combining Short High..Nko Combining Double Dot (0x0816, 0x0819,), # Samaritan Mark In ..Samaritan Mark Dagesh (0x081b, 0x0823,), # Samaritan Mark Epentheti..Samaritan Vowel Sign A (0x0825, 0x0827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U (0x0829, 0x082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa (0x0859, 0x085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark - (0x08e4, 0x08ff,), # Arabic Curly Fatha .. - (0x093c, 0x093c,), # Devanagari Sign Nukta ..Devanagari Sign Nukta - (0x094d, 0x094d,), # Devanagari Sign Virama ..Devanagari Sign Virama - (0x0951, 0x0954,), # Devanagari Stress Sign U..Devanagari Acute Accent + (0x08e3, 0x0903,), # (nil) ..Devanagari Sign Visarga + (0x093a, 0x093c,), # Devanagari Vowel Sign Oe..Devanagari Sign Nukta + (0x093e, 0x094f,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Aw + (0x0951, 0x0957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu + (0x0962, 0x0963,), # Devanagari Vowel Sign Vo..Devanagari Vowel Sign Vo + (0x0981, 0x0983,), # Bengali Sign Candrabindu..Bengali Sign Visarga (0x09bc, 0x09bc,), # Bengali Sign Nukta ..Bengali Sign Nukta - (0x09cd, 0x09cd,), # Bengali Sign Virama ..Bengali Sign Virama + (0x09be, 0x09c4,), # Bengali Vowel Sign Aa ..Bengali Vowel Sign Vocal + (0x09c7, 0x09c8,), # Bengali Vowel Sign E ..Bengali Vowel Sign Ai + (0x09cb, 0x09cd,), # Bengali Vowel Sign O ..Bengali Sign Virama + (0x09d7, 0x09d7,), # Bengali Au Length Mark ..Bengali Au Length Mark + (0x09e2, 0x09e3,), # Bengali Vowel Sign Vocal..Bengali Vowel Sign Vocal + (0x0a01, 0x0a03,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Visarga (0x0a3c, 0x0a3c,), # Gurmukhi Sign Nukta ..Gurmukhi Sign Nukta - (0x0a4d, 0x0a4d,), # Gurmukhi Sign Virama ..Gurmukhi Sign Virama + (0x0a3e, 0x0a42,), # Gurmukhi Vowel Sign Aa ..Gurmukhi Vowel Sign Uu + (0x0a47, 0x0a48,), # Gurmukhi Vowel Sign Ee ..Gurmukhi Vowel Sign Ai + (0x0a4b, 0x0a4d,), # Gurmukhi Vowel Sign Oo ..Gurmukhi Sign Virama + (0x0a51, 0x0a51,), # Gurmukhi Sign Udaat ..Gurmukhi Sign Udaat + (0x0a70, 0x0a71,), # Gurmukhi Tippi ..Gurmukhi Addak + (0x0a75, 0x0a75,), # Gurmukhi Sign Yakash ..Gurmukhi Sign Yakash + (0x0a81, 0x0a83,), # Gujarati Sign Candrabind..Gujarati Sign Visarga (0x0abc, 0x0abc,), # Gujarati Sign Nukta ..Gujarati Sign Nukta - (0x0acd, 0x0acd,), # Gujarati Sign Virama ..Gujarati Sign Virama + (0x0abe, 0x0ac5,), # Gujarati Vowel Sign Aa ..Gujarati Vowel Sign Cand + (0x0ac7, 0x0ac9,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Cand + (0x0acb, 0x0acd,), # Gujarati Vowel Sign O ..Gujarati Sign Virama + (0x0ae2, 0x0ae3,), # Gujarati Vowel Sign Voca..Gujarati Vowel Sign Voca + (0x0b01, 0x0b03,), # Oriya Sign Candrabindu ..Oriya Sign Visarga (0x0b3c, 0x0b3c,), # Oriya Sign Nukta ..Oriya Sign Nukta - (0x0b4d, 0x0b4d,), # Oriya Sign Virama ..Oriya Sign Virama - (0x0bcd, 0x0bcd,), # Tamil Sign Virama ..Tamil Sign Virama - (0x0c4d, 0x0c4d,), # Telugu Sign Virama ..Telugu Sign Virama + (0x0b3e, 0x0b44,), # Oriya Vowel Sign Aa ..Oriya Vowel Sign Vocalic + (0x0b47, 0x0b48,), # Oriya Vowel Sign E ..Oriya Vowel Sign Ai + (0x0b4b, 0x0b4d,), # Oriya Vowel Sign O ..Oriya Sign Virama + (0x0b56, 0x0b57,), # Oriya Ai Length Mark ..Oriya Au Length Mark + (0x0b62, 0x0b63,), # Oriya Vowel Sign Vocalic..Oriya Vowel Sign Vocalic + (0x0b82, 0x0b82,), # Tamil Sign Anusvara ..Tamil Sign Anusvara + (0x0bbe, 0x0bc2,), # Tamil Vowel Sign Aa ..Tamil Vowel Sign Uu + (0x0bc6, 0x0bc8,), # Tamil Vowel Sign E ..Tamil Vowel Sign Ai + (0x0bca, 0x0bcd,), # Tamil Vowel Sign O ..Tamil Sign Virama + (0x0bd7, 0x0bd7,), # Tamil Au Length Mark ..Tamil Au Length Mark + (0x0c00, 0x0c03,), # (nil) ..Telugu Sign Visarga + (0x0c3e, 0x0c44,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Vocali + (0x0c46, 0x0c48,), # Telugu Vowel Sign E ..Telugu Vowel Sign Ai + (0x0c4a, 0x0c4d,), # Telugu Vowel Sign O ..Telugu Sign Virama (0x0c55, 0x0c56,), # Telugu Length Mark ..Telugu Ai Length Mark + (0x0c62, 0x0c63,), # Telugu Vowel Sign Vocali..Telugu Vowel Sign Vocali + (0x0c81, 0x0c83,), # (nil) ..Kannada Sign Visarga (0x0cbc, 0x0cbc,), # Kannada Sign Nukta ..Kannada Sign Nukta - (0x0ccd, 0x0ccd,), # Kannada Sign Virama ..Kannada Sign Virama - (0x0d4d, 0x0d4d,), # Malayalam Sign Virama ..Malayalam Sign Virama + (0x0cbe, 0x0cc4,), # Kannada Vowel Sign Aa ..Kannada Vowel Sign Vocal + (0x0cc6, 0x0cc8,), # Kannada Vowel Sign E ..Kannada Vowel Sign Ai + (0x0cca, 0x0ccd,), # Kannada Vowel Sign O ..Kannada Sign Virama + (0x0cd5, 0x0cd6,), # Kannada Length Mark ..Kannada Ai Length Mark + (0x0ce2, 0x0ce3,), # Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal + (0x0d01, 0x0d03,), # (nil) ..Malayalam Sign Visarga + (0x0d3e, 0x0d44,), # Malayalam Vowel Sign Aa ..Malayalam Vowel Sign Voc + (0x0d46, 0x0d48,), # Malayalam Vowel Sign E ..Malayalam Vowel Sign Ai + (0x0d4a, 0x0d4d,), # Malayalam Vowel Sign O ..Malayalam Sign Virama + (0x0d57, 0x0d57,), # Malayalam Au Length Mark..Malayalam Au Length Mark + (0x0d62, 0x0d63,), # Malayalam Vowel Sign Voc..Malayalam Vowel Sign Voc + (0x0d82, 0x0d83,), # Sinhala Sign Anusvaraya ..Sinhala Sign Visargaya (0x0dca, 0x0dca,), # Sinhala Sign Al-lakuna ..Sinhala Sign Al-lakuna - (0x0e38, 0x0e3a,), # Thai Character Sara U ..Thai Character Phinthu - (0x0e48, 0x0e4b,), # Thai Character Mai Ek ..Thai Character Mai Chatt - (0x0eb8, 0x0eb9,), # Lao Vowel Sign U ..Lao Vowel Sign Uu - (0x0ec8, 0x0ecb,), # Lao Tone Mai Ek ..Lao Tone Mai Catawa + (0x0dcf, 0x0dd4,), # Sinhala Vowel Sign Aela-..Sinhala Vowel Sign Ketti + (0x0dd6, 0x0dd6,), # Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga + (0x0dd8, 0x0ddf,), # Sinhala Vowel Sign Gaett..Sinhala Vowel Sign Gayan + (0x0df2, 0x0df3,), # Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga + (0x0e31, 0x0e31,), # Thai Character Mai Han-a..Thai Character Mai Han-a + (0x0e34, 0x0e3a,), # Thai Character Sara I ..Thai Character Phinthu + (0x0e47, 0x0e4e,), # Thai Character Maitaikhu..Thai Character Yamakkan + (0x0eb1, 0x0eb1,), # Lao Vowel Sign Mai Kan ..Lao Vowel Sign Mai Kan + (0x0eb4, 0x0eb9,), # Lao Vowel Sign I ..Lao Vowel Sign Uu + (0x0ebb, 0x0ebc,), # Lao Vowel Sign Mai Kon ..Lao Semivowel Sign Lo + (0x0ec8, 0x0ecd,), # Lao Tone Mai Ek ..Lao Niggahita (0x0f18, 0x0f19,), # Tibetan Astrological Sig..Tibetan Astrological Sig (0x0f35, 0x0f35,), # Tibetan Mark Ngas Bzung ..Tibetan Mark Ngas Bzung (0x0f37, 0x0f37,), # Tibetan Mark Ngas Bzung ..Tibetan Mark Ngas Bzung (0x0f39, 0x0f39,), # Tibetan Mark Tsa -phru ..Tibetan Mark Tsa -phru - (0x0f71, 0x0f72,), # Tibetan Vowel Sign Aa ..Tibetan Vowel Sign I - (0x0f74, 0x0f74,), # Tibetan Vowel Sign U ..Tibetan Vowel Sign U - (0x0f7a, 0x0f7d,), # Tibetan Vowel Sign E ..Tibetan Vowel Sign Oo - (0x0f80, 0x0f80,), # Tibetan Vowel Sign Rever..Tibetan Vowel Sign Rever - (0x0f82, 0x0f84,), # Tibetan Sign Nyi Zla Naa..Tibetan Mark Halanta + (0x0f3e, 0x0f3f,), # Tibetan Sign Yar Tshes ..Tibetan Sign Mar Tshes + (0x0f71, 0x0f84,), # Tibetan Vowel Sign Aa ..Tibetan Mark Halanta (0x0f86, 0x0f87,), # Tibetan Sign Lci Rtags ..Tibetan Sign Yang Rtags + (0x0f8d, 0x0f97,), # Tibetan Subjoined Sign L..Tibetan Subjoined Letter + (0x0f99, 0x0fbc,), # Tibetan Subjoined Letter..Tibetan Subjoined Letter (0x0fc6, 0x0fc6,), # Tibetan Symbol Padma Gda..Tibetan Symbol Padma Gda - (0x1037, 0x1037,), # Myanmar Sign Dot Below ..Myanmar Sign Dot Below - (0x1039, 0x103a,), # Myanmar Sign Virama ..Myanmar Sign Asat - (0x108d, 0x108d,), # Myanmar Sign Shan Counci..Myanmar Sign Shan Counci + (0x102b, 0x103e,), # Myanmar Vowel Sign Tall ..Myanmar Consonant Sign M + (0x1056, 0x1059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal + (0x105e, 0x1060,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M + (0x1062, 0x1064,), # Myanmar Vowel Sign Sgaw ..Myanmar Tone Mark Sgaw K + (0x1067, 0x106d,), # Myanmar Vowel Sign Weste..Myanmar Sign Western Pwo + (0x1071, 0x1074,), # Myanmar Vowel Sign Geba ..Myanmar Vowel Sign Kayah + (0x1082, 0x108d,), # Myanmar Consonant Sign S..Myanmar Sign Shan Counci + (0x108f, 0x108f,), # Myanmar Sign Rumai Palau..Myanmar Sign Rumai Palau + (0x109a, 0x109d,), # Myanmar Sign Khamti Tone..Myanmar Vowel Sign Aiton (0x135d, 0x135f,), # Ethiopic Combining Gemin..Ethiopic Combining Gemin - (0x1714, 0x1714,), # Tagalog Sign Virama ..Tagalog Sign Virama - (0x1734, 0x1734,), # Hanunoo Sign Pamudpod ..Hanunoo Sign Pamudpod - (0x17d2, 0x17d2,), # Khmer Sign Coeng ..Khmer Sign Coeng + (0x1712, 0x1714,), # Tagalog Vowel Sign I ..Tagalog Sign Virama + (0x1732, 0x1734,), # Hanunoo Vowel Sign I ..Hanunoo Sign Pamudpod + (0x1752, 0x1753,), # Buhid Vowel Sign I ..Buhid Vowel Sign U + (0x1772, 0x1773,), # Tagbanwa Vowel Sign I ..Tagbanwa Vowel Sign U + (0x17b4, 0x17d3,), # Khmer Vowel Inherent Aq ..Khmer Sign Bathamasat (0x17dd, 0x17dd,), # Khmer Sign Atthacan ..Khmer Sign Atthacan + (0x180b, 0x180d,), # Mongolian Free Variation..Mongolian Free Variation (0x18a9, 0x18a9,), # Mongolian Letter Ali Gal..Mongolian Letter Ali Gal - (0x1939, 0x193b,), # Limbu Sign Mukphreng ..Limbu Sign Sa-i - (0x1a17, 0x1a18,), # Buginese Vowel Sign I ..Buginese Vowel Sign U - (0x1a60, 0x1a60,), # Tai Tham Sign Sakot ..Tai Tham Sign Sakot - (0x1a75, 0x1a7c,), # Tai Tham Sign Tone-1 ..Tai Tham Sign Khuen-lue + (0x1920, 0x192b,), # Limbu Vowel Sign A ..Limbu Subjoined Letter W + (0x1930, 0x193b,), # Limbu Small Letter Ka ..Limbu Sign Sa-i + (0x1a17, 0x1a1b,), # Buginese Vowel Sign I ..Buginese Vowel Sign Ae + (0x1a55, 0x1a5e,), # Tai Tham Consonant Sign ..Tai Tham Consonant Sign + (0x1a60, 0x1a7c,), # Tai Tham Sign Sakot ..Tai Tham Sign Khuen-lue (0x1a7f, 0x1a7f,), # Tai Tham Combining Crypt..Tai Tham Combining Crypt - (0x1ab0, 0x1abd,), # (nil) .. - (0x1b34, 0x1b34,), # Balinese Sign Rerekan ..Balinese Sign Rerekan - (0x1b44, 0x1b44,), # Balinese Adeg Adeg ..Balinese Adeg Adeg + (0x1ab0, 0x1abe,), # (nil) .. + (0x1b00, 0x1b04,), # Balinese Sign Ulu Ricem ..Balinese Sign Bisah + (0x1b34, 0x1b44,), # Balinese Sign Rerekan ..Balinese Adeg Adeg (0x1b6b, 0x1b73,), # Balinese Musical Symbol ..Balinese Musical Symbol - (0x1baa, 0x1bab,), # Sundanese Sign Pamaaeh ..Sundanese Sign Virama - (0x1be6, 0x1be6,), # Batak Sign Tompi ..Batak Sign Tompi - (0x1bf2, 0x1bf3,), # Batak Pangolat ..Batak Panongonan - (0x1c37, 0x1c37,), # Lepcha Sign Nukta ..Lepcha Sign Nukta + (0x1b80, 0x1b82,), # Sundanese Sign Panyecek ..Sundanese Sign Pangwisad + (0x1ba1, 0x1bad,), # Sundanese Consonant Sign..Sundanese Consonant Sign + (0x1be6, 0x1bf3,), # Batak Sign Tompi ..Batak Panongonan + (0x1c24, 0x1c37,), # Lepcha Subjoined Letter ..Lepcha Sign Nukta (0x1cd0, 0x1cd2,), # Vedic Tone Karshana ..Vedic Tone Prenkha - (0x1cd4, 0x1ce0,), # Vedic Sign Yajurvedic Mi..Vedic Tone Rigvedic Kash - (0x1ce2, 0x1ce8,), # Vedic Sign Visarga Svari..Vedic Sign Visarga Anuda + (0x1cd4, 0x1ce8,), # Vedic Sign Yajurvedic Mi..Vedic Sign Visarga Anuda (0x1ced, 0x1ced,), # Vedic Sign Tiryak ..Vedic Sign Tiryak - (0x1cf4, 0x1cf4,), # Vedic Tone Candra Above ..Vedic Tone Candra Above + (0x1cf2, 0x1cf4,), # Vedic Sign Ardhavisarga ..Vedic Tone Candra Above (0x1cf8, 0x1cf9,), # (nil) .. (0x1dc0, 0x1df5,), # Combining Dotted Grave A.. (0x1dfc, 0x1dff,), # Combining Double Inverte..Combining Right Arrowhea - (0x20d0, 0x20dc,), # Combining Left Harpoon A..Combining Four Dots Abov - (0x20e1, 0x20e1,), # Combining Left Right Arr..Combining Left Right Arr - (0x20e5, 0x20f0,), # Combining Reverse Solidu..Combining Asterisk Above + (0x20d0, 0x20f0,), # Combining Left Harpoon A..Combining Asterisk Above (0x2cef, 0x2cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu (0x2d7f, 0x2d7f,), # Tifinagh Consonant Joine..Tifinagh Consonant Joine (0x2de0, 0x2dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette (0x302a, 0x302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M (0x3099, 0x309a,), # Combining Katakana-hirag..Combining Katakana-hirag - (0xa66f, 0xa66f,), # Combining Cyrillic Vzmet..Combining Cyrillic Vzmet + (0xa66f, 0xa672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous (0xa674, 0xa67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer - (0xa69f, 0xa69f,), # Combining Cyrillic Lette..Combining Cyrillic Lette + (0xa69e, 0xa69f,), # (nil) ..Combining Cyrillic Lette (0xa6f0, 0xa6f1,), # Bamum Combining Mark Koq..Bamum Combining Mark Tuk + (0xa802, 0xa802,), # Syloti Nagri Sign Dvisva..Syloti Nagri Sign Dvisva (0xa806, 0xa806,), # Syloti Nagri Sign Hasant..Syloti Nagri Sign Hasant - (0xa8c4, 0xa8c4,), # Saurashtra Sign Virama ..Saurashtra Sign Virama + (0xa80b, 0xa80b,), # Syloti Nagri Sign Anusva..Syloti Nagri Sign Anusva + (0xa823, 0xa827,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign + (0xa880, 0xa881,), # Saurashtra Sign Anusvara..Saurashtra Sign Visarga + (0xa8b4, 0xa8c4,), # Saurashtra Consonant Sig..Saurashtra Sign Virama (0xa8e0, 0xa8f1,), # Combining Devanagari Dig..Combining Devanagari Sig - (0xa92b, 0xa92d,), # Kayah Li Tone Plophu ..Kayah Li Tone Calya Plop - (0xa953, 0xa953,), # Rejang Virama ..Rejang Virama - (0xa9b3, 0xa9b3,), # Javanese Sign Cecak Telu..Javanese Sign Cecak Telu - (0xa9c0, 0xa9c0,), # Javanese Pangkon ..Javanese Pangkon + (0xa926, 0xa92d,), # Kayah Li Vowel Ue ..Kayah Li Tone Calya Plop + (0xa947, 0xa953,), # Rejang Vowel Sign I ..Rejang Virama + (0xa980, 0xa983,), # Javanese Sign Panyangga ..Javanese Sign Wignyan + (0xa9b3, 0xa9c0,), # Javanese Sign Cecak Telu..Javanese Pangkon + (0xa9e5, 0xa9e5,), # (nil) .. + (0xaa29, 0xaa36,), # Cham Vowel Sign Aa ..Cham Consonant Sign Wa + (0xaa43, 0xaa43,), # Cham Consonant Sign Fina..Cham Consonant Sign Fina + (0xaa4c, 0xaa4d,), # Cham Consonant Sign Fina..Cham Consonant Sign Fina + (0xaa7b, 0xaa7d,), # Myanmar Sign Pao Karen T.. (0xaab0, 0xaab0,), # Tai Viet Mai Kang ..Tai Viet Mai Kang (0xaab2, 0xaab4,), # Tai Viet Vowel I ..Tai Viet Vowel U (0xaab7, 0xaab8,), # Tai Viet Mai Khit ..Tai Viet Vowel Ia (0xaabe, 0xaabf,), # Tai Viet Vowel Am ..Tai Viet Tone Mai Ek (0xaac1, 0xaac1,), # Tai Viet Tone Mai Tho ..Tai Viet Tone Mai Tho - (0xaaf6, 0xaaf6,), # Meetei Mayek Virama ..Meetei Mayek Virama - (0xabed, 0xabed,), # Meetei Mayek Apun Iyek ..Meetei Mayek Apun Iyek + (0xaaeb, 0xaaef,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0xaaf5, 0xaaf6,), # Meetei Mayek Vowel Sign ..Meetei Mayek Virama + (0xabe3, 0xabea,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0xabec, 0xabed,), # Meetei Mayek Lum Iyek ..Meetei Mayek Apun Iyek (0xfb1e, 0xfb1e,), # Hebrew Point Judeo-spani..Hebrew Point Judeo-spani - (0xfe20, 0xfe2d,), # Combining Ligature Left .. + (0xfe00, 0xfe0f,), # Variation Selector-1 ..Variation Selector-16 + (0xfe20, 0xfe2f,), # Combining Ligature Left .. (0x101fd, 0x101fd,), # Phaistos Disc Sign Combi..Phaistos Disc Sign Combi (0x102e0, 0x102e0,), # (nil) .. (0x10376, 0x1037a,), # (nil) .. - (0x10a0d, 0x10a0d,), # Kharoshthi Sign Double R..Kharoshthi Sign Double R - (0x10a0f, 0x10a0f,), # Kharoshthi Sign Visarga ..Kharoshthi Sign Visarga + (0x10a01, 0x10a03,), # Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo + (0x10a05, 0x10a06,), # Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O + (0x10a0c, 0x10a0f,), # Kharoshthi Vowel Length ..Kharoshthi Sign Visarga (0x10a38, 0x10a3a,), # Kharoshthi Sign Bar Abov..Kharoshthi Sign Dot Belo (0x10a3f, 0x10a3f,), # Kharoshthi Virama ..Kharoshthi Virama (0x10ae5, 0x10ae6,), # (nil) .. - (0x11046, 0x11046,), # Brahmi Virama ..Brahmi Virama - (0x1107f, 0x1107f,), # (nil) .. - (0x110b9, 0x110ba,), # Kaithi Sign Virama ..Kaithi Sign Nukta + (0x11000, 0x11002,), # Brahmi Sign Candrabindu ..Brahmi Sign Visarga + (0x11038, 0x11046,), # Brahmi Vowel Sign Aa ..Brahmi Virama + (0x1107f, 0x11082,), # (nil) ..Kaithi Sign Visarga + (0x110b0, 0x110ba,), # Kaithi Vowel Sign Aa ..Kaithi Sign Nukta (0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga - (0x11133, 0x11134,), # Chakma Virama ..Chakma Maayyaa + (0x11127, 0x11134,), # Chakma Vowel Sign A ..Chakma Maayyaa (0x11173, 0x11173,), # (nil) .. - (0x111c0, 0x111c0,), # Sharada Sign Virama ..Sharada Sign Virama - (0x11235, 0x11236,), # (nil) .. - (0x112e9, 0x112ea,), # (nil) .. + (0x11180, 0x11182,), # Sharada Sign Candrabindu..Sharada Sign Visarga + (0x111b3, 0x111c0,), # Sharada Vowel Sign Aa ..Sharada Sign Virama + (0x111ca, 0x111cc,), # (nil) .. + (0x1122c, 0x11237,), # (nil) .. + (0x112df, 0x112ea,), # (nil) .. + (0x11300, 0x11303,), # (nil) .. (0x1133c, 0x1133c,), # (nil) .. - (0x1134d, 0x1134d,), # (nil) .. + (0x1133e, 0x11344,), # (nil) .. + (0x11347, 0x11348,), # (nil) .. + (0x1134b, 0x1134d,), # (nil) .. + (0x11357, 0x11357,), # (nil) .. + (0x11362, 0x11363,), # (nil) .. (0x11366, 0x1136c,), # (nil) .. (0x11370, 0x11374,), # (nil) .. - (0x114c2, 0x114c3,), # (nil) .. - (0x115bf, 0x115c0,), # (nil) .. - (0x1163f, 0x1163f,), # (nil) .. - (0x116b6, 0x116b7,), # Takri Sign Virama ..Takri Sign Nukta + (0x114b0, 0x114c3,), # (nil) .. + (0x115af, 0x115b5,), # (nil) .. + (0x115b8, 0x115c0,), # (nil) .. + (0x115dc, 0x115dd,), # (nil) .. + (0x11630, 0x11640,), # (nil) .. + (0x116ab, 0x116b7,), # Takri Sign Anusvara ..Takri Sign Nukta + (0x1171d, 0x1172b,), # (nil) .. (0x16af0, 0x16af4,), # (nil) .. (0x16b30, 0x16b36,), # (nil) .. - (0x1bc9e, 0x1bc9e,), # (nil) .. + (0x16f51, 0x16f7e,), # Miao Sign Aspiration ..Miao Vowel Sign Ng + (0x16f8f, 0x16f92,), # Miao Tone Right ..Miao Tone Below + (0x1bc9d, 0x1bc9e,), # (nil) .. (0x1d165, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining (0x1d16d, 0x1d172,), # Musical Symbol Combining..Musical Symbol Combining (0x1d17b, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining (0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining (0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining (0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical + (0x1da00, 0x1da36,), # (nil) .. + (0x1da3b, 0x1da6c,), # (nil) .. + (0x1da75, 0x1da75,), # (nil) .. + (0x1da84, 0x1da84,), # (nil) .. + (0x1da9b, 0x1da9f,), # (nil) .. + (0x1daa1, 0x1daaf,), # (nil) .. (0x1e8d0, 0x1e8d6,), # (nil) .. + (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 ) diff --git a/wcwidth/table_wide.py b/wcwidth/table_wide.py index 8e634f9..ff0c0ac 100644 --- a/wcwidth/table_wide.py +++ b/wcwidth/table_wide.py @@ -1,7 +1,7 @@ """Wide_Eastasian table. Created by setup.py.""" -# Generated: 2014-11-20T06:55:26.602851 -# Source: EastAsianWidth-7.0.0.txt -# Date: 2014-02-28, 23:15:00 GMT [KW, LI] +# Generated: 2015-09-02T01:27:58.907716 +# Source: EastAsianWidth-8.0.0.txt +# Date: 2015-02-10, 21:00:00 GMT [KW, LI] WIDE_EASTASIAN = ( (0x1100, 0x115f,), # Hangul Choseong Kiyeok ..Hangul Choseong Filler (0x2329, 0x232a,), # Left-pointing Angle Brac..Right-pointing Angle Bra diff --git a/wcwidth/table_zero.py b/wcwidth/table_zero.py new file mode 100644 index 0000000..42f9b57 --- /dev/null +++ b/wcwidth/table_zero.py @@ -0,0 +1,270 @@ +"""Zero_Width table. Created by setup.py.""" +# Generated: 2015-09-02T01:27:58.927544 +# Source: DerivedGeneralCategory-8.0.0.txt +# Date: 2015-02-13, 13:47:11 GMT [MD] +ZERO_WIDTH = ( + (0x0300, 0x036f,), # Combining Grave Accent ..Combining Latin Small Le + (0x0483, 0x0489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli + (0x0591, 0x05bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg + (0x05bf, 0x05bf,), # Hebrew Point Rafe ..Hebrew Point Rafe + (0x05c1, 0x05c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot + (0x05c4, 0x05c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot + (0x05c7, 0x05c7,), # Hebrew Point Qamats Qata..Hebrew Point Qamats Qata + (0x0610, 0x061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra + (0x064b, 0x065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below + (0x0670, 0x0670,), # Arabic Letter Superscrip..Arabic Letter Superscrip + (0x06d6, 0x06dc,), # Arabic Small High Ligatu..Arabic Small High Seen + (0x06df, 0x06e4,), # Arabic Small High Rounde..Arabic Small High Madda + (0x06e7, 0x06e8,), # Arabic Small High Yeh ..Arabic Small High Noon + (0x06ea, 0x06ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem + (0x0711, 0x0711,), # Syriac Letter Superscrip..Syriac Letter Superscrip + (0x0730, 0x074a,), # Syriac Pthaha Above ..Syriac Barrekh + (0x07a6, 0x07b0,), # Thaana Abafili ..Thaana Sukun + (0x07eb, 0x07f3,), # Nko Combining Short High..Nko Combining Double Dot + (0x0816, 0x0819,), # Samaritan Mark In ..Samaritan Mark Dagesh + (0x081b, 0x0823,), # Samaritan Mark Epentheti..Samaritan Vowel Sign A + (0x0825, 0x0827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U + (0x0829, 0x082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa + (0x0859, 0x085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark + (0x08e3, 0x0902,), # (nil) ..Devanagari Sign Anusvara + (0x093a, 0x093a,), # Devanagari Vowel Sign Oe..Devanagari Vowel Sign Oe + (0x093c, 0x093c,), # Devanagari Sign Nukta ..Devanagari Sign Nukta + (0x0941, 0x0948,), # Devanagari Vowel Sign U ..Devanagari Vowel Sign Ai + (0x094d, 0x094d,), # Devanagari Sign Virama ..Devanagari Sign Virama + (0x0951, 0x0957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu + (0x0962, 0x0963,), # Devanagari Vowel Sign Vo..Devanagari Vowel Sign Vo + (0x0981, 0x0981,), # Bengali Sign Candrabindu..Bengali Sign Candrabindu + (0x09bc, 0x09bc,), # Bengali Sign Nukta ..Bengali Sign Nukta + (0x09c1, 0x09c4,), # Bengali Vowel Sign U ..Bengali Vowel Sign Vocal + (0x09cd, 0x09cd,), # Bengali Sign Virama ..Bengali Sign Virama + (0x09e2, 0x09e3,), # Bengali Vowel Sign Vocal..Bengali Vowel Sign Vocal + (0x0a01, 0x0a02,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Bindi + (0x0a3c, 0x0a3c,), # Gurmukhi Sign Nukta ..Gurmukhi Sign Nukta + (0x0a41, 0x0a42,), # Gurmukhi Vowel Sign U ..Gurmukhi Vowel Sign Uu + (0x0a47, 0x0a48,), # Gurmukhi Vowel Sign Ee ..Gurmukhi Vowel Sign Ai + (0x0a4b, 0x0a4d,), # Gurmukhi Vowel Sign Oo ..Gurmukhi Sign Virama + (0x0a51, 0x0a51,), # Gurmukhi Sign Udaat ..Gurmukhi Sign Udaat + (0x0a70, 0x0a71,), # Gurmukhi Tippi ..Gurmukhi Addak + (0x0a75, 0x0a75,), # Gurmukhi Sign Yakash ..Gurmukhi Sign Yakash + (0x0a81, 0x0a82,), # Gujarati Sign Candrabind..Gujarati Sign Anusvara + (0x0abc, 0x0abc,), # Gujarati Sign Nukta ..Gujarati Sign Nukta + (0x0ac1, 0x0ac5,), # Gujarati Vowel Sign U ..Gujarati Vowel Sign Cand + (0x0ac7, 0x0ac8,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Ai + (0x0acd, 0x0acd,), # Gujarati Sign Virama ..Gujarati Sign Virama + (0x0ae2, 0x0ae3,), # Gujarati Vowel Sign Voca..Gujarati Vowel Sign Voca + (0x0b01, 0x0b01,), # Oriya Sign Candrabindu ..Oriya Sign Candrabindu + (0x0b3c, 0x0b3c,), # Oriya Sign Nukta ..Oriya Sign Nukta + (0x0b3f, 0x0b3f,), # Oriya Vowel Sign I ..Oriya Vowel Sign I + (0x0b41, 0x0b44,), # Oriya Vowel Sign U ..Oriya Vowel Sign Vocalic + (0x0b4d, 0x0b4d,), # Oriya Sign Virama ..Oriya Sign Virama + (0x0b56, 0x0b56,), # Oriya Ai Length Mark ..Oriya Ai Length Mark + (0x0b62, 0x0b63,), # Oriya Vowel Sign Vocalic..Oriya Vowel Sign Vocalic + (0x0b82, 0x0b82,), # Tamil Sign Anusvara ..Tamil Sign Anusvara + (0x0bc0, 0x0bc0,), # Tamil Vowel Sign Ii ..Tamil Vowel Sign Ii + (0x0bcd, 0x0bcd,), # Tamil Sign Virama ..Tamil Sign Virama + (0x0c00, 0x0c00,), # (nil) .. + (0x0c3e, 0x0c40,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Ii + (0x0c46, 0x0c48,), # Telugu Vowel Sign E ..Telugu Vowel Sign Ai + (0x0c4a, 0x0c4d,), # Telugu Vowel Sign O ..Telugu Sign Virama + (0x0c55, 0x0c56,), # Telugu Length Mark ..Telugu Ai Length Mark + (0x0c62, 0x0c63,), # Telugu Vowel Sign Vocali..Telugu Vowel Sign Vocali + (0x0c81, 0x0c81,), # (nil) .. + (0x0cbc, 0x0cbc,), # Kannada Sign Nukta ..Kannada Sign Nukta + (0x0cbf, 0x0cbf,), # Kannada Vowel Sign I ..Kannada Vowel Sign I + (0x0cc6, 0x0cc6,), # Kannada Vowel Sign E ..Kannada Vowel Sign E + (0x0ccc, 0x0ccd,), # Kannada Vowel Sign Au ..Kannada Sign Virama + (0x0ce2, 0x0ce3,), # Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal + (0x0d01, 0x0d01,), # (nil) .. + (0x0d41, 0x0d44,), # Malayalam Vowel Sign U ..Malayalam Vowel Sign Voc + (0x0d4d, 0x0d4d,), # Malayalam Sign Virama ..Malayalam Sign Virama + (0x0d62, 0x0d63,), # Malayalam Vowel Sign Voc..Malayalam Vowel Sign Voc + (0x0dca, 0x0dca,), # Sinhala Sign Al-lakuna ..Sinhala Sign Al-lakuna + (0x0dd2, 0x0dd4,), # Sinhala Vowel Sign Ketti..Sinhala Vowel Sign Ketti + (0x0dd6, 0x0dd6,), # Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga + (0x0e31, 0x0e31,), # Thai Character Mai Han-a..Thai Character Mai Han-a + (0x0e34, 0x0e3a,), # Thai Character Sara I ..Thai Character Phinthu + (0x0e47, 0x0e4e,), # Thai Character Maitaikhu..Thai Character Yamakkan + (0x0eb1, 0x0eb1,), # Lao Vowel Sign Mai Kan ..Lao Vowel Sign Mai Kan + (0x0eb4, 0x0eb9,), # Lao Vowel Sign I ..Lao Vowel Sign Uu + (0x0ebb, 0x0ebc,), # Lao Vowel Sign Mai Kon ..Lao Semivowel Sign Lo + (0x0ec8, 0x0ecd,), # Lao Tone Mai Ek ..Lao Niggahita + (0x0f18, 0x0f19,), # Tibetan Astrological Sig..Tibetan Astrological Sig + (0x0f35, 0x0f35,), # Tibetan Mark Ngas Bzung ..Tibetan Mark Ngas Bzung + (0x0f37, 0x0f37,), # Tibetan Mark Ngas Bzung ..Tibetan Mark Ngas Bzung + (0x0f39, 0x0f39,), # Tibetan Mark Tsa -phru ..Tibetan Mark Tsa -phru + (0x0f71, 0x0f7e,), # Tibetan Vowel Sign Aa ..Tibetan Sign Rjes Su Nga + (0x0f80, 0x0f84,), # Tibetan Vowel Sign Rever..Tibetan Mark Halanta + (0x0f86, 0x0f87,), # Tibetan Sign Lci Rtags ..Tibetan Sign Yang Rtags + (0x0f8d, 0x0f97,), # Tibetan Subjoined Sign L..Tibetan Subjoined Letter + (0x0f99, 0x0fbc,), # Tibetan Subjoined Letter..Tibetan Subjoined Letter + (0x0fc6, 0x0fc6,), # Tibetan Symbol Padma Gda..Tibetan Symbol Padma Gda + (0x102d, 0x1030,), # Myanmar Vowel Sign I ..Myanmar Vowel Sign Uu + (0x1032, 0x1037,), # Myanmar Vowel Sign Ai ..Myanmar Sign Dot Below + (0x1039, 0x103a,), # Myanmar Sign Virama ..Myanmar Sign Asat + (0x103d, 0x103e,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M + (0x1058, 0x1059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal + (0x105e, 0x1060,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M + (0x1071, 0x1074,), # Myanmar Vowel Sign Geba ..Myanmar Vowel Sign Kayah + (0x1082, 0x1082,), # Myanmar Consonant Sign S..Myanmar Consonant Sign S + (0x1085, 0x1086,), # Myanmar Vowel Sign Shan ..Myanmar Vowel Sign Shan + (0x108d, 0x108d,), # Myanmar Sign Shan Counci..Myanmar Sign Shan Counci + (0x109d, 0x109d,), # Myanmar Vowel Sign Aiton..Myanmar Vowel Sign Aiton + (0x135d, 0x135f,), # Ethiopic Combining Gemin..Ethiopic Combining Gemin + (0x1712, 0x1714,), # Tagalog Vowel Sign I ..Tagalog Sign Virama + (0x1732, 0x1734,), # Hanunoo Vowel Sign I ..Hanunoo Sign Pamudpod + (0x1752, 0x1753,), # Buhid Vowel Sign I ..Buhid Vowel Sign U + (0x1772, 0x1773,), # Tagbanwa Vowel Sign I ..Tagbanwa Vowel Sign U + (0x17b4, 0x17b5,), # Khmer Vowel Inherent Aq ..Khmer Vowel Inherent Aa + (0x17b7, 0x17bd,), # Khmer Vowel Sign I ..Khmer Vowel Sign Ua + (0x17c6, 0x17c6,), # Khmer Sign Nikahit ..Khmer Sign Nikahit + (0x17c9, 0x17d3,), # Khmer Sign Muusikatoan ..Khmer Sign Bathamasat + (0x17dd, 0x17dd,), # Khmer Sign Atthacan ..Khmer Sign Atthacan + (0x180b, 0x180d,), # Mongolian Free Variation..Mongolian Free Variation + (0x18a9, 0x18a9,), # Mongolian Letter Ali Gal..Mongolian Letter Ali Gal + (0x1920, 0x1922,), # Limbu Vowel Sign A ..Limbu Vowel Sign U + (0x1927, 0x1928,), # Limbu Vowel Sign E ..Limbu Vowel Sign O + (0x1932, 0x1932,), # Limbu Small Letter Anusv..Limbu Small Letter Anusv + (0x1939, 0x193b,), # Limbu Sign Mukphreng ..Limbu Sign Sa-i + (0x1a17, 0x1a18,), # Buginese Vowel Sign I ..Buginese Vowel Sign U + (0x1a1b, 0x1a1b,), # Buginese Vowel Sign Ae ..Buginese Vowel Sign Ae + (0x1a56, 0x1a56,), # Tai Tham Consonant Sign ..Tai Tham Consonant Sign + (0x1a58, 0x1a5e,), # Tai Tham Sign Mai Kang L..Tai Tham Consonant Sign + (0x1a60, 0x1a60,), # Tai Tham Sign Sakot ..Tai Tham Sign Sakot + (0x1a62, 0x1a62,), # Tai Tham Vowel Sign Mai ..Tai Tham Vowel Sign Mai + (0x1a65, 0x1a6c,), # Tai Tham Vowel Sign I ..Tai Tham Vowel Sign Oa B + (0x1a73, 0x1a7c,), # Tai Tham Vowel Sign Oa A..Tai Tham Sign Khuen-lue + (0x1a7f, 0x1a7f,), # Tai Tham Combining Crypt..Tai Tham Combining Crypt + (0x1ab0, 0x1abe,), # (nil) .. + (0x1b00, 0x1b03,), # Balinese Sign Ulu Ricem ..Balinese Sign Surang + (0x1b34, 0x1b34,), # Balinese Sign Rerekan ..Balinese Sign Rerekan + (0x1b36, 0x1b3a,), # Balinese Vowel Sign Ulu ..Balinese Vowel Sign Ra R + (0x1b3c, 0x1b3c,), # Balinese Vowel Sign La L..Balinese Vowel Sign La L + (0x1b42, 0x1b42,), # Balinese Vowel Sign Pepe..Balinese Vowel Sign Pepe + (0x1b6b, 0x1b73,), # Balinese Musical Symbol ..Balinese Musical Symbol + (0x1b80, 0x1b81,), # Sundanese Sign Panyecek ..Sundanese Sign Panglayar + (0x1ba2, 0x1ba5,), # Sundanese Consonant Sign..Sundanese Vowel Sign Pan + (0x1ba8, 0x1ba9,), # Sundanese Vowel Sign Pam..Sundanese Vowel Sign Pan + (0x1bab, 0x1bad,), # Sundanese Sign Virama ..Sundanese Consonant Sign + (0x1be6, 0x1be6,), # Batak Sign Tompi ..Batak Sign Tompi + (0x1be8, 0x1be9,), # Batak Vowel Sign Pakpak ..Batak Vowel Sign Ee + (0x1bed, 0x1bed,), # Batak Vowel Sign Karo O ..Batak Vowel Sign Karo O + (0x1bef, 0x1bf1,), # Batak Vowel Sign U For S..Batak Consonant Sign H + (0x1c2c, 0x1c33,), # Lepcha Vowel Sign E ..Lepcha Consonant Sign T + (0x1c36, 0x1c37,), # Lepcha Sign Ran ..Lepcha Sign Nukta + (0x1cd0, 0x1cd2,), # Vedic Tone Karshana ..Vedic Tone Prenkha + (0x1cd4, 0x1ce0,), # Vedic Sign Yajurvedic Mi..Vedic Tone Rigvedic Kash + (0x1ce2, 0x1ce8,), # Vedic Sign Visarga Svari..Vedic Sign Visarga Anuda + (0x1ced, 0x1ced,), # Vedic Sign Tiryak ..Vedic Sign Tiryak + (0x1cf4, 0x1cf4,), # Vedic Tone Candra Above ..Vedic Tone Candra Above + (0x1cf8, 0x1cf9,), # (nil) .. + (0x1dc0, 0x1df5,), # Combining Dotted Grave A.. + (0x1dfc, 0x1dff,), # Combining Double Inverte..Combining Right Arrowhea + (0x20d0, 0x20f0,), # Combining Left Harpoon A..Combining Asterisk Above + (0x2cef, 0x2cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu + (0x2d7f, 0x2d7f,), # Tifinagh Consonant Joine..Tifinagh Consonant Joine + (0x2de0, 0x2dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette + (0x302a, 0x302d,), # Ideographic Level Tone M..Ideographic Entering Ton + (0x3099, 0x309a,), # Combining Katakana-hirag..Combining Katakana-hirag + (0xa66f, 0xa672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous + (0xa674, 0xa67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer + (0xa69e, 0xa69f,), # (nil) ..Combining Cyrillic Lette + (0xa6f0, 0xa6f1,), # Bamum Combining Mark Koq..Bamum Combining Mark Tuk + (0xa802, 0xa802,), # Syloti Nagri Sign Dvisva..Syloti Nagri Sign Dvisva + (0xa806, 0xa806,), # Syloti Nagri Sign Hasant..Syloti Nagri Sign Hasant + (0xa80b, 0xa80b,), # Syloti Nagri Sign Anusva..Syloti Nagri Sign Anusva + (0xa825, 0xa826,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign + (0xa8c4, 0xa8c4,), # Saurashtra Sign Virama ..Saurashtra Sign Virama + (0xa8e0, 0xa8f1,), # Combining Devanagari Dig..Combining Devanagari Sig + (0xa926, 0xa92d,), # Kayah Li Vowel Ue ..Kayah Li Tone Calya Plop + (0xa947, 0xa951,), # Rejang Vowel Sign I ..Rejang Consonant Sign R + (0xa980, 0xa982,), # Javanese Sign Panyangga ..Javanese Sign Layar + (0xa9b3, 0xa9b3,), # Javanese Sign Cecak Telu..Javanese Sign Cecak Telu + (0xa9b6, 0xa9b9,), # Javanese Vowel Sign Wulu..Javanese Vowel Sign Suku + (0xa9bc, 0xa9bc,), # Javanese Vowel Sign Pepe..Javanese Vowel Sign Pepe + (0xa9e5, 0xa9e5,), # (nil) .. + (0xaa29, 0xaa2e,), # Cham Vowel Sign Aa ..Cham Vowel Sign Oe + (0xaa31, 0xaa32,), # Cham Vowel Sign Au ..Cham Vowel Sign Ue + (0xaa35, 0xaa36,), # Cham Consonant Sign La ..Cham Consonant Sign Wa + (0xaa43, 0xaa43,), # Cham Consonant Sign Fina..Cham Consonant Sign Fina + (0xaa4c, 0xaa4c,), # Cham Consonant Sign Fina..Cham Consonant Sign Fina + (0xaa7c, 0xaa7c,), # (nil) .. + (0xaab0, 0xaab0,), # Tai Viet Mai Kang ..Tai Viet Mai Kang + (0xaab2, 0xaab4,), # Tai Viet Vowel I ..Tai Viet Vowel U + (0xaab7, 0xaab8,), # Tai Viet Mai Khit ..Tai Viet Vowel Ia + (0xaabe, 0xaabf,), # Tai Viet Vowel Am ..Tai Viet Tone Mai Ek + (0xaac1, 0xaac1,), # Tai Viet Tone Mai Tho ..Tai Viet Tone Mai Tho + (0xaaec, 0xaaed,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0xaaf6, 0xaaf6,), # Meetei Mayek Virama ..Meetei Mayek Virama + (0xabe5, 0xabe5,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0xabe8, 0xabe8,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0xabed, 0xabed,), # Meetei Mayek Apun Iyek ..Meetei Mayek Apun Iyek + (0xfb1e, 0xfb1e,), # Hebrew Point Judeo-spani..Hebrew Point Judeo-spani + (0xfe00, 0xfe0f,), # Variation Selector-1 ..Variation Selector-16 + (0xfe20, 0xfe2f,), # Combining Ligature Left .. + (0x101fd, 0x101fd,), # Phaistos Disc Sign Combi..Phaistos Disc Sign Combi + (0x102e0, 0x102e0,), # (nil) .. + (0x10376, 0x1037a,), # (nil) .. + (0x10a01, 0x10a03,), # Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo + (0x10a05, 0x10a06,), # Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O + (0x10a0c, 0x10a0f,), # Kharoshthi Vowel Length ..Kharoshthi Sign Visarga + (0x10a38, 0x10a3a,), # Kharoshthi Sign Bar Abov..Kharoshthi Sign Dot Belo + (0x10a3f, 0x10a3f,), # Kharoshthi Virama ..Kharoshthi Virama + (0x10ae5, 0x10ae6,), # (nil) .. + (0x11001, 0x11001,), # Brahmi Sign Anusvara ..Brahmi Sign Anusvara + (0x11038, 0x11046,), # Brahmi Vowel Sign Aa ..Brahmi Virama + (0x1107f, 0x11081,), # (nil) ..Kaithi Sign Anusvara + (0x110b3, 0x110b6,), # Kaithi Vowel Sign U ..Kaithi Vowel Sign Ai + (0x110b9, 0x110ba,), # Kaithi Sign Virama ..Kaithi Sign Nukta + (0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga + (0x11127, 0x1112b,), # Chakma Vowel Sign A ..Chakma Vowel Sign Uu + (0x1112d, 0x11134,), # Chakma Vowel Sign Ai ..Chakma Maayyaa + (0x11173, 0x11173,), # (nil) .. + (0x11180, 0x11181,), # Sharada Sign Candrabindu..Sharada Sign Anusvara + (0x111b6, 0x111be,), # Sharada Vowel Sign U ..Sharada Vowel Sign O + (0x111ca, 0x111cc,), # (nil) .. + (0x1122f, 0x11231,), # (nil) .. + (0x11234, 0x11234,), # (nil) .. + (0x11236, 0x11237,), # (nil) .. + (0x112df, 0x112df,), # (nil) .. + (0x112e3, 0x112ea,), # (nil) .. + (0x11300, 0x11301,), # (nil) .. + (0x1133c, 0x1133c,), # (nil) .. + (0x11340, 0x11340,), # (nil) .. + (0x11366, 0x1136c,), # (nil) .. + (0x11370, 0x11374,), # (nil) .. + (0x114b3, 0x114b8,), # (nil) .. + (0x114ba, 0x114ba,), # (nil) .. + (0x114bf, 0x114c0,), # (nil) .. + (0x114c2, 0x114c3,), # (nil) .. + (0x115b2, 0x115b5,), # (nil) .. + (0x115bc, 0x115bd,), # (nil) .. + (0x115bf, 0x115c0,), # (nil) .. + (0x115dc, 0x115dd,), # (nil) .. + (0x11633, 0x1163a,), # (nil) .. + (0x1163d, 0x1163d,), # (nil) .. + (0x1163f, 0x11640,), # (nil) .. + (0x116ab, 0x116ab,), # Takri Sign Anusvara ..Takri Sign Anusvara + (0x116ad, 0x116ad,), # Takri Vowel Sign Aa ..Takri Vowel Sign Aa + (0x116b0, 0x116b5,), # Takri Vowel Sign U ..Takri Vowel Sign Au + (0x116b7, 0x116b7,), # Takri Sign Nukta ..Takri Sign Nukta + (0x1171d, 0x1171f,), # (nil) .. + (0x11722, 0x11725,), # (nil) .. + (0x11727, 0x1172b,), # (nil) .. + (0x16af0, 0x16af4,), # (nil) .. + (0x16b30, 0x16b36,), # (nil) .. + (0x16f8f, 0x16f92,), # Miao Tone Right ..Miao Tone Below + (0x1bc9d, 0x1bc9e,), # (nil) .. + (0x1d167, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d17b, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical + (0x1da00, 0x1da36,), # (nil) .. + (0x1da3b, 0x1da6c,), # (nil) .. + (0x1da75, 0x1da75,), # (nil) .. + (0x1da84, 0x1da84,), # (nil) .. + (0x1da9b, 0x1da9f,), # (nil) .. + (0x1daa1, 0x1daaf,), # (nil) .. + (0x1e8d0, 0x1e8d6,), # (nil) .. + (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 +) diff --git a/wcwidth/tests/test_core.py b/wcwidth/tests/test_core.py index 313c031..87a4a95 100755 --- a/wcwidth/tests/test_core.py +++ b/wcwidth/tests/test_core.py @@ -107,3 +107,31 @@ def test_combining_cafe(): # verify, assert length_each == expect_length_each assert length_phrase == expect_length_phrase + +def test_combining_enclosing(): + u"""CYRILLIC CAPITAL LETTER A + COMBINING CYRILLIC HUNDRED THOUSANDS SIGN is А҈ of length 1.""" + phrase = u"\u0410\u0488" + expect_length_each = (1, 0) + expect_length_phrase = 1 + + # exercise, + length_each = tuple(map(wcwidth.wcwidth, phrase)) + length_phrase = wcwidth.wcswidth(phrase, len(phrase)) + + # verify, + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase + +def test_combining_spacing(): + u"""Balinese kapal (ship) is ᬓᬨᬮ᭄ of length 4.""" + phrase = u"\u1B13\u1B28\u1B2E\u1B44" + expect_length_each = (1, 1, 1, 1) + expect_length_phrase = 4 + + # exercise, + length_each = tuple(map(wcwidth.wcwidth, phrase)) + length_phrase = wcwidth.wcswidth(phrase, len(phrase)) + + # verify, + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase diff --git a/wcwidth/wcwidth.py b/wcwidth/wcwidth.py index 3befaa9..73145bc 100644 --- a/wcwidth/wcwidth.py +++ b/wcwidth/wcwidth.py @@ -71,7 +71,8 @@ from __future__ import division from .table_wide import WIDE_EASTASIAN -from .table_comb import NONZERO_COMBINING +from .table_comb import COMBINING +from .table_zero import ZERO_WIDTH def _bisearch(ucs, table): @@ -121,8 +122,7 @@ def wcwidth(wc): The following have a column width of 0: - Non-spacing and enclosing combining characters (general - category code Mn or Me in the Unicode database). Generally, - having a non-zero value returned by ``unicodedata.combining()``. + category code Mn or Me in the Unicode database). - NULL (U+0000, 0). @@ -174,8 +174,8 @@ def wcwidth(wc): if ucs < 32 or 0x07F <= ucs < 0x0A0: return -1 - # combining characters have zero width - if _bisearch(ucs, NONZERO_COMBINING): + # combining characters with zero width + if _bisearch(ucs, ZERO_WIDTH): return 0 return 1 + _bisearch(ucs, WIDE_EASTASIAN) From 92b28f84c9487b13134ce1a2c048ee102ae430fd Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Sun, 13 Sep 2015 18:48:42 -0700 Subject: [PATCH 03/29] Use python3.5 to update unicode tables This only updates their # comment verbal description. Lucky for us, python3.5 has also updated their Unicode data, so we can very accurately describe such things. --- wcwidth/table_wide.py | 2 +- wcwidth/table_zero.py | 104 +++++++++++++++++++++--------------------- 2 files changed, 53 insertions(+), 53 deletions(-) diff --git a/wcwidth/table_wide.py b/wcwidth/table_wide.py index ff0c0ac..c2f1b36 100644 --- a/wcwidth/table_wide.py +++ b/wcwidth/table_wide.py @@ -1,5 +1,5 @@ """Wide_Eastasian table. Created by setup.py.""" -# Generated: 2015-09-02T01:27:58.907716 +# Generated: 2015-09-14T01:48:19.503860 # Source: EastAsianWidth-8.0.0.txt # Date: 2015-02-10, 21:00:00 GMT [KW, LI] WIDE_EASTASIAN = ( diff --git a/wcwidth/table_zero.py b/wcwidth/table_zero.py index 42f9b57..1e4f78d 100644 --- a/wcwidth/table_zero.py +++ b/wcwidth/table_zero.py @@ -1,5 +1,5 @@ """Zero_Width table. Created by setup.py.""" -# Generated: 2015-09-02T01:27:58.927544 +# Generated: 2015-09-14T01:48:19.532217 # Source: DerivedGeneralCategory-8.0.0.txt # Date: 2015-02-13, 13:47:11 GMT [MD] ZERO_WIDTH = ( @@ -26,7 +26,7 @@ (0x0825, 0x0827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U (0x0829, 0x082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa (0x0859, 0x085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark - (0x08e3, 0x0902,), # (nil) ..Devanagari Sign Anusvara + (0x08e3, 0x0902,), # Arabic Turned Damma Belo..Devanagari Sign Anusvara (0x093a, 0x093a,), # Devanagari Vowel Sign Oe..Devanagari Vowel Sign Oe (0x093c, 0x093c,), # Devanagari Sign Nukta ..Devanagari Sign Nukta (0x0941, 0x0948,), # Devanagari Vowel Sign U ..Devanagari Vowel Sign Ai @@ -62,19 +62,19 @@ (0x0b82, 0x0b82,), # Tamil Sign Anusvara ..Tamil Sign Anusvara (0x0bc0, 0x0bc0,), # Tamil Vowel Sign Ii ..Tamil Vowel Sign Ii (0x0bcd, 0x0bcd,), # Tamil Sign Virama ..Tamil Sign Virama - (0x0c00, 0x0c00,), # (nil) .. + (0x0c00, 0x0c00,), # Telugu Sign Combining Ca..Telugu Sign Combining Ca (0x0c3e, 0x0c40,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Ii (0x0c46, 0x0c48,), # Telugu Vowel Sign E ..Telugu Vowel Sign Ai (0x0c4a, 0x0c4d,), # Telugu Vowel Sign O ..Telugu Sign Virama (0x0c55, 0x0c56,), # Telugu Length Mark ..Telugu Ai Length Mark (0x0c62, 0x0c63,), # Telugu Vowel Sign Vocali..Telugu Vowel Sign Vocali - (0x0c81, 0x0c81,), # (nil) .. + (0x0c81, 0x0c81,), # Kannada Sign Candrabindu..Kannada Sign Candrabindu (0x0cbc, 0x0cbc,), # Kannada Sign Nukta ..Kannada Sign Nukta (0x0cbf, 0x0cbf,), # Kannada Vowel Sign I ..Kannada Vowel Sign I (0x0cc6, 0x0cc6,), # Kannada Vowel Sign E ..Kannada Vowel Sign E (0x0ccc, 0x0ccd,), # Kannada Vowel Sign Au ..Kannada Sign Virama (0x0ce2, 0x0ce3,), # Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal - (0x0d01, 0x0d01,), # (nil) .. + (0x0d01, 0x0d01,), # Malayalam Sign Candrabin..Malayalam Sign Candrabin (0x0d41, 0x0d44,), # Malayalam Vowel Sign U ..Malayalam Vowel Sign Voc (0x0d4d, 0x0d4d,), # Malayalam Sign Virama ..Malayalam Sign Virama (0x0d62, 0x0d63,), # Malayalam Vowel Sign Voc..Malayalam Vowel Sign Voc @@ -134,7 +134,7 @@ (0x1a65, 0x1a6c,), # Tai Tham Vowel Sign I ..Tai Tham Vowel Sign Oa B (0x1a73, 0x1a7c,), # Tai Tham Vowel Sign Oa A..Tai Tham Sign Khuen-lue (0x1a7f, 0x1a7f,), # Tai Tham Combining Crypt..Tai Tham Combining Crypt - (0x1ab0, 0x1abe,), # (nil) .. + (0x1ab0, 0x1abe,), # Combining Doubled Circum..Combining Parentheses Ov (0x1b00, 0x1b03,), # Balinese Sign Ulu Ricem ..Balinese Sign Surang (0x1b34, 0x1b34,), # Balinese Sign Rerekan ..Balinese Sign Rerekan (0x1b36, 0x1b3a,), # Balinese Vowel Sign Ulu ..Balinese Vowel Sign Ra R @@ -156,8 +156,8 @@ (0x1ce2, 0x1ce8,), # Vedic Sign Visarga Svari..Vedic Sign Visarga Anuda (0x1ced, 0x1ced,), # Vedic Sign Tiryak ..Vedic Sign Tiryak (0x1cf4, 0x1cf4,), # Vedic Tone Candra Above ..Vedic Tone Candra Above - (0x1cf8, 0x1cf9,), # (nil) .. - (0x1dc0, 0x1df5,), # Combining Dotted Grave A.. + (0x1cf8, 0x1cf9,), # Vedic Tone Ring Above ..Vedic Tone Double Ring A + (0x1dc0, 0x1df5,), # Combining Dotted Grave A..Combining Up Tack Above (0x1dfc, 0x1dff,), # Combining Double Inverte..Combining Right Arrowhea (0x20d0, 0x20f0,), # Combining Left Harpoon A..Combining Asterisk Above (0x2cef, 0x2cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu @@ -167,7 +167,7 @@ (0x3099, 0x309a,), # Combining Katakana-hirag..Combining Katakana-hirag (0xa66f, 0xa672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous (0xa674, 0xa67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer - (0xa69e, 0xa69f,), # (nil) ..Combining Cyrillic Lette + (0xa69e, 0xa69f,), # Combining Cyrillic Lette..Combining Cyrillic Lette (0xa6f0, 0xa6f1,), # Bamum Combining Mark Koq..Bamum Combining Mark Tuk (0xa802, 0xa802,), # Syloti Nagri Sign Dvisva..Syloti Nagri Sign Dvisva (0xa806, 0xa806,), # Syloti Nagri Sign Hasant..Syloti Nagri Sign Hasant @@ -181,13 +181,13 @@ (0xa9b3, 0xa9b3,), # Javanese Sign Cecak Telu..Javanese Sign Cecak Telu (0xa9b6, 0xa9b9,), # Javanese Vowel Sign Wulu..Javanese Vowel Sign Suku (0xa9bc, 0xa9bc,), # Javanese Vowel Sign Pepe..Javanese Vowel Sign Pepe - (0xa9e5, 0xa9e5,), # (nil) .. + (0xa9e5, 0xa9e5,), # Myanmar Sign Shan Saw ..Myanmar Sign Shan Saw (0xaa29, 0xaa2e,), # Cham Vowel Sign Aa ..Cham Vowel Sign Oe (0xaa31, 0xaa32,), # Cham Vowel Sign Au ..Cham Vowel Sign Ue (0xaa35, 0xaa36,), # Cham Consonant Sign La ..Cham Consonant Sign Wa (0xaa43, 0xaa43,), # Cham Consonant Sign Fina..Cham Consonant Sign Fina (0xaa4c, 0xaa4c,), # Cham Consonant Sign Fina..Cham Consonant Sign Fina - (0xaa7c, 0xaa7c,), # (nil) .. + (0xaa7c, 0xaa7c,), # Myanmar Sign Tai Laing T..Myanmar Sign Tai Laing T (0xaab0, 0xaab0,), # Tai Viet Mai Kang ..Tai Viet Mai Kang (0xaab2, 0xaab4,), # Tai Viet Vowel I ..Tai Viet Vowel U (0xaab7, 0xaab8,), # Tai Viet Mai Khit ..Tai Viet Vowel Ia @@ -200,71 +200,71 @@ (0xabed, 0xabed,), # Meetei Mayek Apun Iyek ..Meetei Mayek Apun Iyek (0xfb1e, 0xfb1e,), # Hebrew Point Judeo-spani..Hebrew Point Judeo-spani (0xfe00, 0xfe0f,), # Variation Selector-1 ..Variation Selector-16 - (0xfe20, 0xfe2f,), # Combining Ligature Left .. + (0xfe20, 0xfe2f,), # Combining Ligature Left ..Combining Cyrillic Titlo (0x101fd, 0x101fd,), # Phaistos Disc Sign Combi..Phaistos Disc Sign Combi - (0x102e0, 0x102e0,), # (nil) .. - (0x10376, 0x1037a,), # (nil) .. + (0x102e0, 0x102e0,), # Coptic Epact Thousands M..Coptic Epact Thousands M + (0x10376, 0x1037a,), # Combining Old Permic Let..Combining Old Permic Let (0x10a01, 0x10a03,), # Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo (0x10a05, 0x10a06,), # Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O (0x10a0c, 0x10a0f,), # Kharoshthi Vowel Length ..Kharoshthi Sign Visarga (0x10a38, 0x10a3a,), # Kharoshthi Sign Bar Abov..Kharoshthi Sign Dot Belo (0x10a3f, 0x10a3f,), # Kharoshthi Virama ..Kharoshthi Virama - (0x10ae5, 0x10ae6,), # (nil) .. + (0x10ae5, 0x10ae6,), # Manichaean Abbreviation ..Manichaean Abbreviation (0x11001, 0x11001,), # Brahmi Sign Anusvara ..Brahmi Sign Anusvara (0x11038, 0x11046,), # Brahmi Vowel Sign Aa ..Brahmi Virama - (0x1107f, 0x11081,), # (nil) ..Kaithi Sign Anusvara + (0x1107f, 0x11081,), # Brahmi Number Joiner ..Kaithi Sign Anusvara (0x110b3, 0x110b6,), # Kaithi Vowel Sign U ..Kaithi Vowel Sign Ai (0x110b9, 0x110ba,), # Kaithi Sign Virama ..Kaithi Sign Nukta (0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga (0x11127, 0x1112b,), # Chakma Vowel Sign A ..Chakma Vowel Sign Uu (0x1112d, 0x11134,), # Chakma Vowel Sign Ai ..Chakma Maayyaa - (0x11173, 0x11173,), # (nil) .. + (0x11173, 0x11173,), # Mahajani Sign Nukta ..Mahajani Sign Nukta (0x11180, 0x11181,), # Sharada Sign Candrabindu..Sharada Sign Anusvara (0x111b6, 0x111be,), # Sharada Vowel Sign U ..Sharada Vowel Sign O - (0x111ca, 0x111cc,), # (nil) .. - (0x1122f, 0x11231,), # (nil) .. - (0x11234, 0x11234,), # (nil) .. - (0x11236, 0x11237,), # (nil) .. - (0x112df, 0x112df,), # (nil) .. - (0x112e3, 0x112ea,), # (nil) .. - (0x11300, 0x11301,), # (nil) .. - (0x1133c, 0x1133c,), # (nil) .. - (0x11340, 0x11340,), # (nil) .. - (0x11366, 0x1136c,), # (nil) .. - (0x11370, 0x11374,), # (nil) .. - (0x114b3, 0x114b8,), # (nil) .. - (0x114ba, 0x114ba,), # (nil) .. - (0x114bf, 0x114c0,), # (nil) .. - (0x114c2, 0x114c3,), # (nil) .. - (0x115b2, 0x115b5,), # (nil) .. - (0x115bc, 0x115bd,), # (nil) .. - (0x115bf, 0x115c0,), # (nil) .. - (0x115dc, 0x115dd,), # (nil) .. - (0x11633, 0x1163a,), # (nil) .. - (0x1163d, 0x1163d,), # (nil) .. - (0x1163f, 0x11640,), # (nil) .. + (0x111ca, 0x111cc,), # Sharada Sign Nukta ..Sharada Extra Short Vowe + (0x1122f, 0x11231,), # Khojki Vowel Sign U ..Khojki Vowel Sign Ai + (0x11234, 0x11234,), # Khojki Sign Anusvara ..Khojki Sign Anusvara + (0x11236, 0x11237,), # Khojki Sign Nukta ..Khojki Sign Shadda + (0x112df, 0x112df,), # Khudawadi Sign Anusvara ..Khudawadi Sign Anusvara + (0x112e3, 0x112ea,), # Khudawadi Vowel Sign U ..Khudawadi Sign Virama + (0x11300, 0x11301,), # Grantha Sign Combining A..Grantha Sign Candrabindu + (0x1133c, 0x1133c,), # Grantha Sign Nukta ..Grantha Sign Nukta + (0x11340, 0x11340,), # Grantha Vowel Sign Ii ..Grantha Vowel Sign Ii + (0x11366, 0x1136c,), # Combining Grantha Digit ..Combining Grantha Digit + (0x11370, 0x11374,), # Combining Grantha Letter..Combining Grantha Letter + (0x114b3, 0x114b8,), # Tirhuta Vowel Sign U ..Tirhuta Vowel Sign Vocal + (0x114ba, 0x114ba,), # Tirhuta Vowel Sign Short..Tirhuta Vowel Sign Short + (0x114bf, 0x114c0,), # Tirhuta Sign Candrabindu..Tirhuta Sign Anusvara + (0x114c2, 0x114c3,), # Tirhuta Sign Virama ..Tirhuta Sign Nukta + (0x115b2, 0x115b5,), # Siddham Vowel Sign U ..Siddham Vowel Sign Vocal + (0x115bc, 0x115bd,), # Siddham Sign Candrabindu..Siddham Sign Anusvara + (0x115bf, 0x115c0,), # Siddham Sign Virama ..Siddham Sign Nukta + (0x115dc, 0x115dd,), # Siddham Vowel Sign Alter..Siddham Vowel Sign Alter + (0x11633, 0x1163a,), # Modi Vowel Sign U ..Modi Vowel Sign Ai + (0x1163d, 0x1163d,), # Modi Sign Anusvara ..Modi Sign Anusvara + (0x1163f, 0x11640,), # Modi Sign Virama ..Modi Sign Ardhacandra (0x116ab, 0x116ab,), # Takri Sign Anusvara ..Takri Sign Anusvara (0x116ad, 0x116ad,), # Takri Vowel Sign Aa ..Takri Vowel Sign Aa (0x116b0, 0x116b5,), # Takri Vowel Sign U ..Takri Vowel Sign Au (0x116b7, 0x116b7,), # Takri Sign Nukta ..Takri Sign Nukta - (0x1171d, 0x1171f,), # (nil) .. - (0x11722, 0x11725,), # (nil) .. - (0x11727, 0x1172b,), # (nil) .. - (0x16af0, 0x16af4,), # (nil) .. - (0x16b30, 0x16b36,), # (nil) .. + (0x1171d, 0x1171f,), # Ahom Consonant Sign Medi..Ahom Consonant Sign Medi + (0x11722, 0x11725,), # Ahom Vowel Sign I ..Ahom Vowel Sign Uu + (0x11727, 0x1172b,), # Ahom Vowel Sign Aw ..Ahom Sign Killer + (0x16af0, 0x16af4,), # Bassa Vah Combining High..Bassa Vah Combining High + (0x16b30, 0x16b36,), # Pahawh Hmong Mark Cim Tu..Pahawh Hmong Mark Cim Ta (0x16f8f, 0x16f92,), # Miao Tone Right ..Miao Tone Below - (0x1bc9d, 0x1bc9e,), # (nil) .. + (0x1bc9d, 0x1bc9e,), # Duployan Thick Letter Se..Duployan Double Mark (0x1d167, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining (0x1d17b, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining (0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining (0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining (0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical - (0x1da00, 0x1da36,), # (nil) .. - (0x1da3b, 0x1da6c,), # (nil) .. - (0x1da75, 0x1da75,), # (nil) .. - (0x1da84, 0x1da84,), # (nil) .. - (0x1da9b, 0x1da9f,), # (nil) .. - (0x1daa1, 0x1daaf,), # (nil) .. - (0x1e8d0, 0x1e8d6,), # (nil) .. + (0x1da00, 0x1da36,), # Signwriting Head Rim ..Signwriting Air Sucking + (0x1da3b, 0x1da6c,), # Signwriting Mouth Closed..Signwriting Excitement + (0x1da75, 0x1da75,), # Signwriting Upper Body T..Signwriting Upper Body T + (0x1da84, 0x1da84,), # Signwriting Location Hea..Signwriting Location Hea + (0x1da9b, 0x1da9f,), # Signwriting Fill Modifie..Signwriting Fill Modifie + (0x1daa1, 0x1daaf,), # Signwriting Rotation Mod..Signwriting Rotation Mod + (0x1e8d0, 0x1e8d6,), # Mende Kikakui Combining ..Mende Kikakui Combining (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 ) From 4deb5633801afe92e11748d7b44e0e896f0a80e0 Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Sun, 13 Sep 2015 18:51:11 -0700 Subject: [PATCH 04/29] Remove table_comb.py entirely, if it is unused! --- wcwidth/table_comb.py | 242 ------------------------------------------ wcwidth/wcwidth.py | 1 - 2 files changed, 243 deletions(-) delete mode 100644 wcwidth/table_comb.py diff --git a/wcwidth/table_comb.py b/wcwidth/table_comb.py deleted file mode 100644 index 4203ff2..0000000 --- a/wcwidth/table_comb.py +++ /dev/null @@ -1,242 +0,0 @@ -"""Combining table. Created by setup.py.""" -# Generated: 2015-09-02T01:27:58.918349 -# Source: DerivedGeneralCategory-8.0.0.txt -# Date: 2015-02-13, 13:47:11 GMT [MD] -COMBINING = ( - (0x0300, 0x036f,), # Combining Grave Accent ..Combining Latin Small Le - (0x0483, 0x0489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli - (0x0591, 0x05bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg - (0x05bf, 0x05bf,), # Hebrew Point Rafe ..Hebrew Point Rafe - (0x05c1, 0x05c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot - (0x05c4, 0x05c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot - (0x05c7, 0x05c7,), # Hebrew Point Qamats Qata..Hebrew Point Qamats Qata - (0x0610, 0x061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra - (0x064b, 0x065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below - (0x0670, 0x0670,), # Arabic Letter Superscrip..Arabic Letter Superscrip - (0x06d6, 0x06dc,), # Arabic Small High Ligatu..Arabic Small High Seen - (0x06df, 0x06e4,), # Arabic Small High Rounde..Arabic Small High Madda - (0x06e7, 0x06e8,), # Arabic Small High Yeh ..Arabic Small High Noon - (0x06ea, 0x06ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem - (0x0711, 0x0711,), # Syriac Letter Superscrip..Syriac Letter Superscrip - (0x0730, 0x074a,), # Syriac Pthaha Above ..Syriac Barrekh - (0x07a6, 0x07b0,), # Thaana Abafili ..Thaana Sukun - (0x07eb, 0x07f3,), # Nko Combining Short High..Nko Combining Double Dot - (0x0816, 0x0819,), # Samaritan Mark In ..Samaritan Mark Dagesh - (0x081b, 0x0823,), # Samaritan Mark Epentheti..Samaritan Vowel Sign A - (0x0825, 0x0827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U - (0x0829, 0x082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa - (0x0859, 0x085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark - (0x08e3, 0x0903,), # (nil) ..Devanagari Sign Visarga - (0x093a, 0x093c,), # Devanagari Vowel Sign Oe..Devanagari Sign Nukta - (0x093e, 0x094f,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Aw - (0x0951, 0x0957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu - (0x0962, 0x0963,), # Devanagari Vowel Sign Vo..Devanagari Vowel Sign Vo - (0x0981, 0x0983,), # Bengali Sign Candrabindu..Bengali Sign Visarga - (0x09bc, 0x09bc,), # Bengali Sign Nukta ..Bengali Sign Nukta - (0x09be, 0x09c4,), # Bengali Vowel Sign Aa ..Bengali Vowel Sign Vocal - (0x09c7, 0x09c8,), # Bengali Vowel Sign E ..Bengali Vowel Sign Ai - (0x09cb, 0x09cd,), # Bengali Vowel Sign O ..Bengali Sign Virama - (0x09d7, 0x09d7,), # Bengali Au Length Mark ..Bengali Au Length Mark - (0x09e2, 0x09e3,), # Bengali Vowel Sign Vocal..Bengali Vowel Sign Vocal - (0x0a01, 0x0a03,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Visarga - (0x0a3c, 0x0a3c,), # Gurmukhi Sign Nukta ..Gurmukhi Sign Nukta - (0x0a3e, 0x0a42,), # Gurmukhi Vowel Sign Aa ..Gurmukhi Vowel Sign Uu - (0x0a47, 0x0a48,), # Gurmukhi Vowel Sign Ee ..Gurmukhi Vowel Sign Ai - (0x0a4b, 0x0a4d,), # Gurmukhi Vowel Sign Oo ..Gurmukhi Sign Virama - (0x0a51, 0x0a51,), # Gurmukhi Sign Udaat ..Gurmukhi Sign Udaat - (0x0a70, 0x0a71,), # Gurmukhi Tippi ..Gurmukhi Addak - (0x0a75, 0x0a75,), # Gurmukhi Sign Yakash ..Gurmukhi Sign Yakash - (0x0a81, 0x0a83,), # Gujarati Sign Candrabind..Gujarati Sign Visarga - (0x0abc, 0x0abc,), # Gujarati Sign Nukta ..Gujarati Sign Nukta - (0x0abe, 0x0ac5,), # Gujarati Vowel Sign Aa ..Gujarati Vowel Sign Cand - (0x0ac7, 0x0ac9,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Cand - (0x0acb, 0x0acd,), # Gujarati Vowel Sign O ..Gujarati Sign Virama - (0x0ae2, 0x0ae3,), # Gujarati Vowel Sign Voca..Gujarati Vowel Sign Voca - (0x0b01, 0x0b03,), # Oriya Sign Candrabindu ..Oriya Sign Visarga - (0x0b3c, 0x0b3c,), # Oriya Sign Nukta ..Oriya Sign Nukta - (0x0b3e, 0x0b44,), # Oriya Vowel Sign Aa ..Oriya Vowel Sign Vocalic - (0x0b47, 0x0b48,), # Oriya Vowel Sign E ..Oriya Vowel Sign Ai - (0x0b4b, 0x0b4d,), # Oriya Vowel Sign O ..Oriya Sign Virama - (0x0b56, 0x0b57,), # Oriya Ai Length Mark ..Oriya Au Length Mark - (0x0b62, 0x0b63,), # Oriya Vowel Sign Vocalic..Oriya Vowel Sign Vocalic - (0x0b82, 0x0b82,), # Tamil Sign Anusvara ..Tamil Sign Anusvara - (0x0bbe, 0x0bc2,), # Tamil Vowel Sign Aa ..Tamil Vowel Sign Uu - (0x0bc6, 0x0bc8,), # Tamil Vowel Sign E ..Tamil Vowel Sign Ai - (0x0bca, 0x0bcd,), # Tamil Vowel Sign O ..Tamil Sign Virama - (0x0bd7, 0x0bd7,), # Tamil Au Length Mark ..Tamil Au Length Mark - (0x0c00, 0x0c03,), # (nil) ..Telugu Sign Visarga - (0x0c3e, 0x0c44,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Vocali - (0x0c46, 0x0c48,), # Telugu Vowel Sign E ..Telugu Vowel Sign Ai - (0x0c4a, 0x0c4d,), # Telugu Vowel Sign O ..Telugu Sign Virama - (0x0c55, 0x0c56,), # Telugu Length Mark ..Telugu Ai Length Mark - (0x0c62, 0x0c63,), # Telugu Vowel Sign Vocali..Telugu Vowel Sign Vocali - (0x0c81, 0x0c83,), # (nil) ..Kannada Sign Visarga - (0x0cbc, 0x0cbc,), # Kannada Sign Nukta ..Kannada Sign Nukta - (0x0cbe, 0x0cc4,), # Kannada Vowel Sign Aa ..Kannada Vowel Sign Vocal - (0x0cc6, 0x0cc8,), # Kannada Vowel Sign E ..Kannada Vowel Sign Ai - (0x0cca, 0x0ccd,), # Kannada Vowel Sign O ..Kannada Sign Virama - (0x0cd5, 0x0cd6,), # Kannada Length Mark ..Kannada Ai Length Mark - (0x0ce2, 0x0ce3,), # Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal - (0x0d01, 0x0d03,), # (nil) ..Malayalam Sign Visarga - (0x0d3e, 0x0d44,), # Malayalam Vowel Sign Aa ..Malayalam Vowel Sign Voc - (0x0d46, 0x0d48,), # Malayalam Vowel Sign E ..Malayalam Vowel Sign Ai - (0x0d4a, 0x0d4d,), # Malayalam Vowel Sign O ..Malayalam Sign Virama - (0x0d57, 0x0d57,), # Malayalam Au Length Mark..Malayalam Au Length Mark - (0x0d62, 0x0d63,), # Malayalam Vowel Sign Voc..Malayalam Vowel Sign Voc - (0x0d82, 0x0d83,), # Sinhala Sign Anusvaraya ..Sinhala Sign Visargaya - (0x0dca, 0x0dca,), # Sinhala Sign Al-lakuna ..Sinhala Sign Al-lakuna - (0x0dcf, 0x0dd4,), # Sinhala Vowel Sign Aela-..Sinhala Vowel Sign Ketti - (0x0dd6, 0x0dd6,), # Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga - (0x0dd8, 0x0ddf,), # Sinhala Vowel Sign Gaett..Sinhala Vowel Sign Gayan - (0x0df2, 0x0df3,), # Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga - (0x0e31, 0x0e31,), # Thai Character Mai Han-a..Thai Character Mai Han-a - (0x0e34, 0x0e3a,), # Thai Character Sara I ..Thai Character Phinthu - (0x0e47, 0x0e4e,), # Thai Character Maitaikhu..Thai Character Yamakkan - (0x0eb1, 0x0eb1,), # Lao Vowel Sign Mai Kan ..Lao Vowel Sign Mai Kan - (0x0eb4, 0x0eb9,), # Lao Vowel Sign I ..Lao Vowel Sign Uu - (0x0ebb, 0x0ebc,), # Lao Vowel Sign Mai Kon ..Lao Semivowel Sign Lo - (0x0ec8, 0x0ecd,), # Lao Tone Mai Ek ..Lao Niggahita - (0x0f18, 0x0f19,), # Tibetan Astrological Sig..Tibetan Astrological Sig - (0x0f35, 0x0f35,), # Tibetan Mark Ngas Bzung ..Tibetan Mark Ngas Bzung - (0x0f37, 0x0f37,), # Tibetan Mark Ngas Bzung ..Tibetan Mark Ngas Bzung - (0x0f39, 0x0f39,), # Tibetan Mark Tsa -phru ..Tibetan Mark Tsa -phru - (0x0f3e, 0x0f3f,), # Tibetan Sign Yar Tshes ..Tibetan Sign Mar Tshes - (0x0f71, 0x0f84,), # Tibetan Vowel Sign Aa ..Tibetan Mark Halanta - (0x0f86, 0x0f87,), # Tibetan Sign Lci Rtags ..Tibetan Sign Yang Rtags - (0x0f8d, 0x0f97,), # Tibetan Subjoined Sign L..Tibetan Subjoined Letter - (0x0f99, 0x0fbc,), # Tibetan Subjoined Letter..Tibetan Subjoined Letter - (0x0fc6, 0x0fc6,), # Tibetan Symbol Padma Gda..Tibetan Symbol Padma Gda - (0x102b, 0x103e,), # Myanmar Vowel Sign Tall ..Myanmar Consonant Sign M - (0x1056, 0x1059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal - (0x105e, 0x1060,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M - (0x1062, 0x1064,), # Myanmar Vowel Sign Sgaw ..Myanmar Tone Mark Sgaw K - (0x1067, 0x106d,), # Myanmar Vowel Sign Weste..Myanmar Sign Western Pwo - (0x1071, 0x1074,), # Myanmar Vowel Sign Geba ..Myanmar Vowel Sign Kayah - (0x1082, 0x108d,), # Myanmar Consonant Sign S..Myanmar Sign Shan Counci - (0x108f, 0x108f,), # Myanmar Sign Rumai Palau..Myanmar Sign Rumai Palau - (0x109a, 0x109d,), # Myanmar Sign Khamti Tone..Myanmar Vowel Sign Aiton - (0x135d, 0x135f,), # Ethiopic Combining Gemin..Ethiopic Combining Gemin - (0x1712, 0x1714,), # Tagalog Vowel Sign I ..Tagalog Sign Virama - (0x1732, 0x1734,), # Hanunoo Vowel Sign I ..Hanunoo Sign Pamudpod - (0x1752, 0x1753,), # Buhid Vowel Sign I ..Buhid Vowel Sign U - (0x1772, 0x1773,), # Tagbanwa Vowel Sign I ..Tagbanwa Vowel Sign U - (0x17b4, 0x17d3,), # Khmer Vowel Inherent Aq ..Khmer Sign Bathamasat - (0x17dd, 0x17dd,), # Khmer Sign Atthacan ..Khmer Sign Atthacan - (0x180b, 0x180d,), # Mongolian Free Variation..Mongolian Free Variation - (0x18a9, 0x18a9,), # Mongolian Letter Ali Gal..Mongolian Letter Ali Gal - (0x1920, 0x192b,), # Limbu Vowel Sign A ..Limbu Subjoined Letter W - (0x1930, 0x193b,), # Limbu Small Letter Ka ..Limbu Sign Sa-i - (0x1a17, 0x1a1b,), # Buginese Vowel Sign I ..Buginese Vowel Sign Ae - (0x1a55, 0x1a5e,), # Tai Tham Consonant Sign ..Tai Tham Consonant Sign - (0x1a60, 0x1a7c,), # Tai Tham Sign Sakot ..Tai Tham Sign Khuen-lue - (0x1a7f, 0x1a7f,), # Tai Tham Combining Crypt..Tai Tham Combining Crypt - (0x1ab0, 0x1abe,), # (nil) .. - (0x1b00, 0x1b04,), # Balinese Sign Ulu Ricem ..Balinese Sign Bisah - (0x1b34, 0x1b44,), # Balinese Sign Rerekan ..Balinese Adeg Adeg - (0x1b6b, 0x1b73,), # Balinese Musical Symbol ..Balinese Musical Symbol - (0x1b80, 0x1b82,), # Sundanese Sign Panyecek ..Sundanese Sign Pangwisad - (0x1ba1, 0x1bad,), # Sundanese Consonant Sign..Sundanese Consonant Sign - (0x1be6, 0x1bf3,), # Batak Sign Tompi ..Batak Panongonan - (0x1c24, 0x1c37,), # Lepcha Subjoined Letter ..Lepcha Sign Nukta - (0x1cd0, 0x1cd2,), # Vedic Tone Karshana ..Vedic Tone Prenkha - (0x1cd4, 0x1ce8,), # Vedic Sign Yajurvedic Mi..Vedic Sign Visarga Anuda - (0x1ced, 0x1ced,), # Vedic Sign Tiryak ..Vedic Sign Tiryak - (0x1cf2, 0x1cf4,), # Vedic Sign Ardhavisarga ..Vedic Tone Candra Above - (0x1cf8, 0x1cf9,), # (nil) .. - (0x1dc0, 0x1df5,), # Combining Dotted Grave A.. - (0x1dfc, 0x1dff,), # Combining Double Inverte..Combining Right Arrowhea - (0x20d0, 0x20f0,), # Combining Left Harpoon A..Combining Asterisk Above - (0x2cef, 0x2cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu - (0x2d7f, 0x2d7f,), # Tifinagh Consonant Joine..Tifinagh Consonant Joine - (0x2de0, 0x2dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette - (0x302a, 0x302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M - (0x3099, 0x309a,), # Combining Katakana-hirag..Combining Katakana-hirag - (0xa66f, 0xa672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous - (0xa674, 0xa67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer - (0xa69e, 0xa69f,), # (nil) ..Combining Cyrillic Lette - (0xa6f0, 0xa6f1,), # Bamum Combining Mark Koq..Bamum Combining Mark Tuk - (0xa802, 0xa802,), # Syloti Nagri Sign Dvisva..Syloti Nagri Sign Dvisva - (0xa806, 0xa806,), # Syloti Nagri Sign Hasant..Syloti Nagri Sign Hasant - (0xa80b, 0xa80b,), # Syloti Nagri Sign Anusva..Syloti Nagri Sign Anusva - (0xa823, 0xa827,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign - (0xa880, 0xa881,), # Saurashtra Sign Anusvara..Saurashtra Sign Visarga - (0xa8b4, 0xa8c4,), # Saurashtra Consonant Sig..Saurashtra Sign Virama - (0xa8e0, 0xa8f1,), # Combining Devanagari Dig..Combining Devanagari Sig - (0xa926, 0xa92d,), # Kayah Li Vowel Ue ..Kayah Li Tone Calya Plop - (0xa947, 0xa953,), # Rejang Vowel Sign I ..Rejang Virama - (0xa980, 0xa983,), # Javanese Sign Panyangga ..Javanese Sign Wignyan - (0xa9b3, 0xa9c0,), # Javanese Sign Cecak Telu..Javanese Pangkon - (0xa9e5, 0xa9e5,), # (nil) .. - (0xaa29, 0xaa36,), # Cham Vowel Sign Aa ..Cham Consonant Sign Wa - (0xaa43, 0xaa43,), # Cham Consonant Sign Fina..Cham Consonant Sign Fina - (0xaa4c, 0xaa4d,), # Cham Consonant Sign Fina..Cham Consonant Sign Fina - (0xaa7b, 0xaa7d,), # Myanmar Sign Pao Karen T.. - (0xaab0, 0xaab0,), # Tai Viet Mai Kang ..Tai Viet Mai Kang - (0xaab2, 0xaab4,), # Tai Viet Vowel I ..Tai Viet Vowel U - (0xaab7, 0xaab8,), # Tai Viet Mai Khit ..Tai Viet Vowel Ia - (0xaabe, 0xaabf,), # Tai Viet Vowel Am ..Tai Viet Tone Mai Ek - (0xaac1, 0xaac1,), # Tai Viet Tone Mai Tho ..Tai Viet Tone Mai Tho - (0xaaeb, 0xaaef,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign - (0xaaf5, 0xaaf6,), # Meetei Mayek Vowel Sign ..Meetei Mayek Virama - (0xabe3, 0xabea,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign - (0xabec, 0xabed,), # Meetei Mayek Lum Iyek ..Meetei Mayek Apun Iyek - (0xfb1e, 0xfb1e,), # Hebrew Point Judeo-spani..Hebrew Point Judeo-spani - (0xfe00, 0xfe0f,), # Variation Selector-1 ..Variation Selector-16 - (0xfe20, 0xfe2f,), # Combining Ligature Left .. - (0x101fd, 0x101fd,), # Phaistos Disc Sign Combi..Phaistos Disc Sign Combi - (0x102e0, 0x102e0,), # (nil) .. - (0x10376, 0x1037a,), # (nil) .. - (0x10a01, 0x10a03,), # Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo - (0x10a05, 0x10a06,), # Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O - (0x10a0c, 0x10a0f,), # Kharoshthi Vowel Length ..Kharoshthi Sign Visarga - (0x10a38, 0x10a3a,), # Kharoshthi Sign Bar Abov..Kharoshthi Sign Dot Belo - (0x10a3f, 0x10a3f,), # Kharoshthi Virama ..Kharoshthi Virama - (0x10ae5, 0x10ae6,), # (nil) .. - (0x11000, 0x11002,), # Brahmi Sign Candrabindu ..Brahmi Sign Visarga - (0x11038, 0x11046,), # Brahmi Vowel Sign Aa ..Brahmi Virama - (0x1107f, 0x11082,), # (nil) ..Kaithi Sign Visarga - (0x110b0, 0x110ba,), # Kaithi Vowel Sign Aa ..Kaithi Sign Nukta - (0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga - (0x11127, 0x11134,), # Chakma Vowel Sign A ..Chakma Maayyaa - (0x11173, 0x11173,), # (nil) .. - (0x11180, 0x11182,), # Sharada Sign Candrabindu..Sharada Sign Visarga - (0x111b3, 0x111c0,), # Sharada Vowel Sign Aa ..Sharada Sign Virama - (0x111ca, 0x111cc,), # (nil) .. - (0x1122c, 0x11237,), # (nil) .. - (0x112df, 0x112ea,), # (nil) .. - (0x11300, 0x11303,), # (nil) .. - (0x1133c, 0x1133c,), # (nil) .. - (0x1133e, 0x11344,), # (nil) .. - (0x11347, 0x11348,), # (nil) .. - (0x1134b, 0x1134d,), # (nil) .. - (0x11357, 0x11357,), # (nil) .. - (0x11362, 0x11363,), # (nil) .. - (0x11366, 0x1136c,), # (nil) .. - (0x11370, 0x11374,), # (nil) .. - (0x114b0, 0x114c3,), # (nil) .. - (0x115af, 0x115b5,), # (nil) .. - (0x115b8, 0x115c0,), # (nil) .. - (0x115dc, 0x115dd,), # (nil) .. - (0x11630, 0x11640,), # (nil) .. - (0x116ab, 0x116b7,), # Takri Sign Anusvara ..Takri Sign Nukta - (0x1171d, 0x1172b,), # (nil) .. - (0x16af0, 0x16af4,), # (nil) .. - (0x16b30, 0x16b36,), # (nil) .. - (0x16f51, 0x16f7e,), # Miao Sign Aspiration ..Miao Vowel Sign Ng - (0x16f8f, 0x16f92,), # Miao Tone Right ..Miao Tone Below - (0x1bc9d, 0x1bc9e,), # (nil) .. - (0x1d165, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining - (0x1d16d, 0x1d172,), # Musical Symbol Combining..Musical Symbol Combining - (0x1d17b, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining - (0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining - (0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining - (0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical - (0x1da00, 0x1da36,), # (nil) .. - (0x1da3b, 0x1da6c,), # (nil) .. - (0x1da75, 0x1da75,), # (nil) .. - (0x1da84, 0x1da84,), # (nil) .. - (0x1da9b, 0x1da9f,), # (nil) .. - (0x1daa1, 0x1daaf,), # (nil) .. - (0x1e8d0, 0x1e8d6,), # (nil) .. - (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 -) diff --git a/wcwidth/wcwidth.py b/wcwidth/wcwidth.py index 73145bc..e4d536a 100644 --- a/wcwidth/wcwidth.py +++ b/wcwidth/wcwidth.py @@ -71,7 +71,6 @@ from __future__ import division from .table_wide import WIDE_EASTASIAN -from .table_comb import COMBINING from .table_zero import ZERO_WIDTH From 800363de878e8bbee4c9c6e21449de75abab9ef3 Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Sun, 13 Sep 2015 19:01:53 -0700 Subject: [PATCH 05/29] README.rst brevity and clarity --- README.rst | 178 ++++++++++++++++++++++------------------------------- 1 file changed, 75 insertions(+), 103 deletions(-) diff --git a/README.rst b/README.rst index 066c111..ee918b1 100644 --- a/README.rst +++ b/README.rst @@ -26,58 +26,31 @@ Introduction ============ -This API is mainly for Terminal Emulator implementors -- any python program -that attempts to determine the printable width of a string on a Terminal. It -is implemented in python (no C library calls) and has no 3rd-party dependencies. - -It is certainly possible to use your Operating System's ``wcwidth(3)`` and -``wcswidth(3)`` calls if it is POSIX-conforming, but this would not be possible -on non-POSIX platforms, such as Windows, or for alternative Python -implementations, such as jython. It is also commonly many releases older -than the most current Unicode Standard release files, which this project -aims to track. - -The most current release of this API is based from Unicode Standard release -*8.0.0*, dated *2015-02-10, 21:00:00 GMT [KW, LI]* for table generated by -file ``EastAsianWidth-8.0.0.txt`` and *2015-02-13, 13:47:11 GMT [MD]* for -``DerivedGeneralCategory-8.0.0.txt``. +This API is mainly for Terminal Emulator implementors, or those writing +programs that expect to interpreted by a terminal emulator and wish to +determine the printable width of a string on a Terminal. -Installation ------------- +Usually, the length of the string is equivalent to the number of cells +it occupies except that there are are also some categories of characters +which occupy 2 or even 0 cells. POSIX-conforming systems provide +``wcwidth(3)`` and ``wcswidth(3)`` of which this module's interface mirrors +precisely. -The stable version of this package is maintained on pypi, install using pip:: +This library aims to be forward-looking, portable, and most correct. The most +current release of this API is based from Unicode Standard release files: - pip install wcwidth +``EastAsianWidth-8.0.0.txt`` + *2015-02-10, 21:00:00 GMT [KW, LI]* -Problem -------- - -You may have noticed some characters especially Chinese, Japanese, and -Korean (collectively known as the *CJK Unified Ideographs*) consume more -than 1 terminal cell. If you ask for the length of the string, ``u'コンニチハ'`` -(Japanese: Hello), it is correctly determined to be a length of **5** using -the ``len()`` built-in. - -However, if you were to print this to a Terminal Emulator, such as xterm, -urxvt, Terminal.app, PuTTY, or iTerm2, it would consume **10** *cells* (columns). -This causes problems for many of the text-alignment functions, such as ``rjust()``. -On an 80-wide terminal, the following would wrap along the margin, instead -of displaying it right-aligned as desired:: - - >>> text = u'コンニチハ' - >>> print(text.rjust(80)) - コン - ニチハ +``DerivedGeneralCategory-8.0.0.txt`` + *2015-02-13, 13:47:11 GMT [MD]* -Solution --------- +Installation +------------ -This API allows one to determine the printable length of these strings, -that the length of ``wcwidth(u'コ')`` is reported as ``2``, and -``wcswidth(u'コンニチハ')`` as ``10``. +The stable version of this package is maintained on pypi, install using pip:: -This allows one to determine the printable effects of displaying *CJK* -characters on a terminal emulator. + pip install wcwidth wcwidth, wcswidth ----------------- @@ -89,39 +62,45 @@ To Display ``u'コンニチハ'`` right-adjusted on screen of 80 columns:: >>> from wcwidth import wcswidth >>> text = u'コンニチハ' >>> print(u' ' * (80 - wcswidth(text)) + text) - コンニチハ +Return Values +------------- -Values ------- +``-1`` + Indeterminate (not printable). -A general overview of return values: +``0`` + Does not advance the cursor, such as NULL or Combining. -- ``-1``: indeterminate (see Todo_). -- ``0``: do not advance the cursor, such as NULL. -- ``2``: East_Asian_Width property values W and F (Wide and Full-width). -- ``1``: all others. +``2`` + Characters of category East Asian Wide (W) or East Asian + Full-width (F) which are displayed using two terminal cells. + +``1`` + All others. ``wcswidth()`` simply returns the sum of all values along a string, or -``-1`` if it has occurred for any value returned by ``wcwidth()``. A more -exacting list of conditions and return values may be found in the docstring -for ``wcwidth()``. +``-1`` in total if any part of the string results in -1. A more exact +list of conditions and return values may be found in the docstring:: -Discrepacies ------------- + $ pydoc wcwidth + + +Discrepancies +------------- -There may be discrepancies with the determined printable width of of characters -by *wcwidth* and the results of any given terminal emulator -- most commonly, -emulators are using your Operating System's ``wcwidth(3)`` implementation which -is often based on tables much older than the most current Unicode Specification. -Python's determination of non-zero combining_ characters may also be based on an -older specification. +This library does its best to return the most appropriate return value for a +very particular terminal user interface where a monospaced fixed-cell +rendering is expected. As the POSIX Terminal programming interfaces do not +provide any means to determine the unicode support level, we can only do our +best to return the *correct* result for the given codepoint, and not what any +terminal emulator particular does. -You may determine an exacting list of these discrepancies using files -`wcwidth-libc-comparator.py`_ and `wcwidth-combining-comparator.py`_ +Python's determination of non-zero combining_ characters may also be based on +an older specification. -.. _`wcwidth-libc-comparator.py`: https://github.com/jquast/wcwidth/tree/master/bin/wcwidth-libc-comparator.py -.. _`wcwidth-combining-comparator.py`: https://github.com/jquast/wcwidth/tree/master/bin/wcwidth-combining-comparator.py +You may determine an exacting list of these discrepancies using the project +files `wcwidth-libc-comparator.py `_ and `wcwidth-combining-comparator.py `_. ========== @@ -142,22 +121,18 @@ The command ``python setup.py update`` will fetch the following resources: - http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt - http://www.unicode.org/Public/UNIDATA/extracted/DerivedGeneralCategory.txt -And generate the table files `wcwidth/table_wide.py`_, -`wcwidth/table_comb.py`_, and `wcwidth/table_zero.py`_. +And generates the table files: -.. _`wcwidth/table_wide.py`: https://github.com/jquast/wcwidth/tree/master/wcwidth/table_wide.py -.. _`wcwidth/table_comb.py`: https://github.com/jquast/wcwidth/tree/master/wcwidth/table_comb.py -.. _`wcwidth/table_zero.py`: https://github.com/jquast/wcwidth/tree/master/wcwidth/table_zero.py +- `wcwidth/table_wide.py `_ +- `wcwidth/table_zero.py `_ wcwidth.c --------- This code was originally derived directly from C code of the same name, -whose latest version is available at: `wcwidth.c`_ And is authored by -Markus Kuhn -- 2007-05-26 (Unicode 5.0) - -.. _`wcwidth.c`: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c - +whose latest version is available at: `wcwidth.c +`_ And is authored by Markus +Kuhn -- 2007-05-26 (Unicode 5.0) Examples -------- @@ -169,35 +144,18 @@ This library is used in: - `jonathanslenders/python-prompt-toolkit`_, a Library for building powerful interactive command lines in Python. -Additional tools for displaying and testing wcwidth is found in the ``bin/`` -folder of this project (github link: `wcwidth/bin`_). They are not distributed -as a script or part of the module. +Additional tools for displaying and testing wcwidth are found in the `bin/ +`_ folder of this project. They are not +distributed as a script or part of the module. .. _`jquast/blessed`: https://github.com/jquast/blessed .. _`jonathanslenders/python-prompt-toolkit`: https://github.com/jonathanslenders/python-prompt-toolkit -.. _`wcwidth/bin`: https://github.com/jquast/wcwidth/tree/master/bin - -Todo ----- - -Though some of the most common ("zero-width") `combining`_ characters -are understood by wcswidth, there are still many edge cases that need -to be covered, especially certain kinds of sequences such as those -containing Control-Sequence-Inducer (CSI). License ------- -The original license is as follows:: - - Permission to use, copy, modify, and distribute this software - for any purpose and without fee is hereby granted. The author - disclaims all warranties with regard to this software. - -No specific licensing is specified, and Mr. Kuhn resides in the UK which allows -some protection from Copyrighting. As this derivative is based on US Soil, -an OSI-approved license that appears most-alike has been chosen, the MIT license:: +MIT license:: The MIT License (MIT) @@ -221,10 +179,22 @@ an OSI-approved license that appears most-alike has been chosen, the MIT license OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +The original license is as follows:: + + Permission to use, copy, modify, and distribute this software + for any purpose and without fee is hereby granted. The author + disclaims all warranties with regard to this software. + +Mr. Kuhn resides in the UK which allows some protection from Copyright law. +As a US citizen, I feel compelled to chose an OSI-approved license to ensure +both my safety and yours, therefore the least-restrictive among them, the MIT +license was chosen. + + Changes ------- -0.1.4 +0.1.4 *2014-11-20 Pre-Alpha* * **Feature**: ``wcswidth()`` now determines printable length for (most) combining characters. The developer's tool `bin/wcwidth-browser.py`_ is improved to display combining_ @@ -232,15 +202,15 @@ Changes (`Thomas Ballinger`_ and `Leta Montopoli`_ `PR #5`_). * added static analysis (prospector_) to testing framework. -0.1.3 +0.1.3 *2014-10-29 Pre-Alpha* * **Bugfix**: 2nd parameter of wcswidth was not honored. (`Thomas Ballinger`_, `PR #4`). -0.1.2 +0.1.2 *2014-10-28 Pre-Alpha* * **Updated** tables to Unicode Specification 7.0.0. (`Thomas Ballinger`_, `PR #3`). -0.1.1 +0.1.1 *2014-05-14 Pre-Alpha* * Initial release to pypi, Based on Unicode Specification 6.3.0 .. _`prospector`: https://github.com/landscapeio/prospector @@ -248,6 +218,8 @@ Changes .. _`bin/wcwidth-browser.py`: https://github.com/jquast/wcwidth/tree/master/bin/wcwidth-browser.py .. _`Thomas Ballinger`: https://github.com/thomasballinger .. _`Leta Montopoli`: https://github.com/lmontopo +.. _`Philip Craig`: https://github.com/philipc .. _`PR #3`: https://github.com/jquast/wcwidth/pull/3 .. _`PR #4`: https://github.com/jquast/wcwidth/pull/4 .. _`PR #5`: https://github.com/jquast/wcwidth/pull/5 +.. _`PR #11`: https://github.com/jquast/wcwidth/pull/11 From 45dcdbc9ec3c1c8f2debf31a2b97cda8ba0899e7 Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Sun, 13 Sep 2015 19:03:38 -0700 Subject: [PATCH 06/29] setup.py: remove develop tat, remove table_comb.py --- setup.py | 68 ++++++++++++++++++-------------------------------------- 1 file changed, 22 insertions(+), 46 deletions(-) diff --git a/setup.py b/setup.py index 373e723..dd9ec9c 100755 --- a/setup.py +++ b/setup.py @@ -42,14 +42,15 @@ class SetupUpdate(setuptools.Command): description = "Fetch and update unicode code tables" user_options = [] - EAW_URL = 'http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt' - EAW_IN = os.path.join(HERE, 'data', 'EastAsianWidth.txt') - EAW_OUT = os.path.join(HERE, 'wcwidth', 'table_wide.py') - + EAW_URL = ('http://www.unicode.org/Public/UNIDATA/' + 'EastAsianWidth.txt') UCD_URL = ('http://www.unicode.org/Public/UNIDATA/extracted/' 'DerivedGeneralCategory.txt') + + EAW_IN = os.path.join(HERE, 'data', 'EastAsianWidth.txt') UCD_IN = os.path.join(HERE, 'data', 'DerivedGeneralCategory.txt') - CMB_OUT = os.path.join(HERE, 'wcwidth', 'table_comb.py') + + EAW_OUT = os.path.join(HERE, 'wcwidth', 'table_wide.py') ZERO_OUT = os.path.join(HERE, 'wcwidth', 'table_zero.py') def initialize_options(self): @@ -61,30 +62,27 @@ def finalize_options(self): pass def run(self): - """Execute command: update east-asian, combining and zero width tables.""" - assert os.getenv('VIRTUAL_ENV'), 'You should be in a virtualenv' - self.do_east_asian_width() - self.do_combining() + """Update east-asian, combining and zero width tables.""" + self.do_east_asian() self.do_zero_width() - def do_east_asian_width(self): + def do_east_asian(self): """Fetch and update east-asian tables.""" self._do_retrieve(self.EAW_URL, self.EAW_IN) - (version, date, values) = self._do_east_asian_width_parse(self.EAW_IN) + (version, date, values) = self._parse_east_asian( + fname=self.EAW_IN, + properties=(u'W', u'F',) + ) table = self._make_table(values) self._do_write(self.EAW_OUT, 'WIDE_EASTASIAN', version, date, table) - def do_combining(self): - """Fetch and update combining tables.""" - self._do_retrieve(self.UCD_URL, self.UCD_IN) - (version, date, values) = self._do_category_parse(self.UCD_IN, ('Mc', 'Me', 'Mn',)) - table = self._make_table(values) - self._do_write(self.CMB_OUT, 'COMBINING', version, date, table) - def do_zero_width(self): """Fetch and update zero width tables.""" self._do_retrieve(self.UCD_URL, self.UCD_IN) - (version, date, values) = self._do_category_parse(self.UCD_IN, ('Me', 'Mn',)) + (version, date, values) = self._parse_category( + fname=self.UCD_IN, + categories=('Me', 'Mn',) + ) table = self._make_table(values) self._do_write(self.ZERO_OUT, 'ZERO_WIDTH', version, date, table) @@ -109,11 +107,7 @@ def _make_table(values): @staticmethod def _do_retrieve(url, fname): """Retrieve given url to target filepath fname.""" - try: - import requests - except ImportError: - print("Execute '{} develop' first.".format(__file__)) - exit(1) + import requests folder = os.path.dirname(fname) if not os.path.exists(folder): os.makedirs(folder) @@ -127,8 +121,7 @@ def _do_retrieve(url, fname): return fname @staticmethod - def _do_east_asian_width_parse(fname, - east_asian_width_properties=(u'W', u'F',)): + def _parse_east_asian(fname, properties=(u'W', u'F',)): """Parse unicode east-asian width tables.""" version, date, values = None, None, [] print("parsing {} ..".format(fname)) @@ -144,7 +137,7 @@ def _do_east_asian_width_parse(fname, continue addrs, details = uline.split(';', 1) if any(details.startswith(property) - for property in east_asian_width_properties): + for property in properties): start, stop = addrs, addrs if '..' in addrs: start, stop = addrs.split('..') @@ -152,7 +145,7 @@ def _do_east_asian_width_parse(fname, return version, date, sorted(values) @staticmethod - def _do_category_parse(fname, categories): + def _parse_category(fname, categories): """Parse unicode category tables.""" version, date, values = None, None, [] print("parsing {} ..".format(fname)) @@ -169,7 +162,7 @@ def _do_category_parse(fname, categories): addrs, details = uline.split(';', 1) addrs, details = addrs.rstrip(), details.lstrip() if any(details.startswith('{} #'.format(value)) - for value in categories): + for value in categories): start, stop = addrs, addrs if '..' in addrs: start, stop = addrs.split('..') @@ -219,23 +212,6 @@ def _do_write(fname, variable, version, date, table): print("complete.") -class SetupDevelop(setuptools.command.develop.develop): - - """'setup.py develop' is augmented to install development tools.""" - - # pylint: disable=R0904 - # Too many public methods (43/20) - - def run(self): - """Execute command pip for development requirements.""" - # pylint: disable=E1101 - # Instance of 'SetupDevelop' has no 'spawn' member (col 8) - assert os.getenv('VIRTUAL_ENV'), 'You should be in a virtualenv' - setuptools.command.develop.develop.run(self) - self.spawn(('pip', 'install', '-U', - 'blessed', 'requests', 'tox', 'docopt',)) - - class SetupTest(setuptools.command.test.test): """'setup.py test' is an alias to execute tox.""" From 4baf950b88a28a9fe21d132e088b4ffc58e66276 Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Sun, 13 Sep 2015 19:23:39 -0700 Subject: [PATCH 07/29] move-to LICENSE file to compliment github --- LICENSE | 21 +++++++++++++++++++++ README.rst | 39 --------------------------------------- 2 files changed, 21 insertions(+), 39 deletions(-) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..f0d3471 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2014 Jeff Quast + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.rst b/README.rst index ee918b1..0ce8cdf 100644 --- a/README.rst +++ b/README.rst @@ -152,45 +152,6 @@ distributed as a script or part of the module. .. _`jonathanslenders/python-prompt-toolkit`: https://github.com/jonathanslenders/python-prompt-toolkit -License -------- - -MIT license:: - - The MIT License (MIT) - - Copyright (c) 2014 - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - -The original license is as follows:: - - Permission to use, copy, modify, and distribute this software - for any purpose and without fee is hereby granted. The author - disclaims all warranties with regard to this software. - -Mr. Kuhn resides in the UK which allows some protection from Copyright law. -As a US citizen, I feel compelled to chose an OSI-approved license to ensure -both my safety and yours, therefore the least-restrictive among them, the MIT -license was chosen. - - Changes ------- From b1e6d0d9bf0b78a6338854ae86c508dbf6deb7e5 Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Sun, 13 Sep 2015 19:23:56 -0700 Subject: [PATCH 08/29] move developer requirements over to file --- requirements-develop.txt | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 requirements-develop.txt diff --git a/requirements-develop.txt b/requirements-develop.txt new file mode 100644 index 0000000..26d5ea7 --- /dev/null +++ b/requirements-develop.txt @@ -0,0 +1,2 @@ +requests +blessed From f8782eefe4010314ec5efc57b5f774281de4e4c4 Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Sun, 13 Sep 2015 19:24:06 -0700 Subject: [PATCH 09/29] add python3.5 support to tox. --- tox.ini | 1 + 1 file changed, 1 insertion(+) diff --git a/tox.ini b/tox.ini index 357dd42..9f962ad 100644 --- a/tox.ini +++ b/tox.ini @@ -4,6 +4,7 @@ envlist = prospector, py27, py33, py34, + py35, pypy [testenv] From 78cdf059d9cd74fa1885ec5468b34431de891f32 Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Sun, 13 Sep 2015 19:24:29 -0700 Subject: [PATCH 10/29] ignore .DS_Store --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index ad521f6..4411244 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,4 @@ docs/_build htmlcov .coveralls.yml data +.DS_Store From 56447247923f262ab5ae2adeb26b5135caf6a79d Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Sun, 13 Sep 2015 20:29:48 -0700 Subject: [PATCH 11/29] Revise docstrings of wcwidth & wcswidth. --- wcwidth/wcwidth.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/wcwidth/wcwidth.py b/wcwidth/wcwidth.py index e4d536a..45b072e 100644 --- a/wcwidth/wcwidth.py +++ b/wcwidth/wcwidth.py @@ -108,9 +108,9 @@ def wcwidth(wc): The wcwidth() function returns 0 if the wc argument has no printable effect on a terminal (such as NUL '\0'), -1 if wc is not printable, or has an - indeterminate effect on the terminal (control or combining). Otherwise, - the number of column positions the character occupies on a graphic terminal - (1 or 2). + indeterminate effect on the terminal, such as a control character. + Otherwise, the number of column positions the character occupies on a + graphic terminal (1 or 2) is returned. The following have a column width of -1: @@ -184,9 +184,11 @@ def wcswidth(pwcs, n=None): """ Given a unicode string, return its printable length on a terminal. - Return the width in character cells of the first ``n`` unicode string pwcs, - or -1 if a non-printable character is encountered. When ``n`` is None - (default), return the length of the entire string. + Return the width, in cells, necessary to display the first ``n`` + characters of the unicode string ``pwcs``. When ``n`` is None (default), + return the length of the entire string. + + Returns ``-1`` if a non-printable character is encountered. """ # pylint: disable=C0103 # Invalid argument name "n" From 07cea7f70f298c7124c5a7838848c392516a676d Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Sun, 13 Sep 2015 20:30:07 -0700 Subject: [PATCH 12/29] Brevity and use true sphinx format in docstring --- wcwidth/wcwidth.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/wcwidth/wcwidth.py b/wcwidth/wcwidth.py index 45b072e..48c2e70 100644 --- a/wcwidth/wcwidth.py +++ b/wcwidth/wcwidth.py @@ -78,11 +78,10 @@ def _bisearch(ucs, table): """ Auxiliary function for binary search in interval table. - :param ucs: a single unicode character's ordinal value. - :type ucs: int - :param table: a lookup table of tuples in form of ``[(start, end), ...]``. - :type table: list - :rtype int + :arg int ucs: Ordinal value of unicode character. + :arg list table: List of starting and ending ranges of ordinal values, + in form of ``[(start, end), ...]``. + :rtype: int :returns: 1 if ordinal value ucs is found within lookup table, else 0. """ lbound = 0 From 38d2e81614b2e901f12ab374c8d6dd928dcff3a9 Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Sun, 13 Sep 2015 20:59:45 -0700 Subject: [PATCH 13/29] return to usedevelop=false (implicit) --- tox.ini | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tox.ini b/tox.ini index 9f962ad..73f66db 100644 --- a/tox.ini +++ b/tox.ini @@ -20,8 +20,8 @@ commands = {envbindir}/py.test \ {toxinidir}/bin/wcwidth-combining-comparator.py [testenv:prospector] -usedevelop = True deps = prospector[with_everything] + --requirement=requirements-develop.txt commands = prospector \ --profile {toxinidir}/.prospector.yaml \ --no-autodetect @@ -47,9 +47,9 @@ commands = {envbindir}/py.test \ [testenv:py34] # for python34, measure coverage -usedevelop = True deps = pytest pytest-cov + --requirement=requirements-develop.txt commands = {envbindir}/py.test -v \ -x --strict \ From 9f543e7354275a8aa7e5aa2f5951e1200d1b55db Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Sun, 13 Sep 2015 21:30:39 -0700 Subject: [PATCH 14/29] remove SetupDevelop target (erk!) --- setup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index dd9ec9c..708fc8e 100755 --- a/setup.py +++ b/setup.py @@ -258,9 +258,9 @@ def main(): ], keywords=['terminal', 'emulator', 'wcwidth', 'wcswidth', 'cjk', 'combining', 'xterm', 'console', ], - cmdclass={'develop': SetupDevelop, - 'update': SetupUpdate, - 'test': SetupTest}, + cmdclass={ + 'update': SetupUpdate, + 'test': SetupTest}, ) if __name__ == '__main__': From 410864a2afc43072189275609e0f6044e0d346b7 Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Sun, 13 Sep 2015 21:31:38 -0700 Subject: [PATCH 15/29] use sudo: false for great travis speeds --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 3129375..0c85a4b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,4 +1,5 @@ language: python +sudo: false env: - TOXENV=py26 From 3fdd990ee5a79607682030fd8c7d40718a5842ca Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Sun, 13 Sep 2015 22:15:39 -0700 Subject: [PATCH 16/29] remove wcwidth-combining-comparator this isn't really that useful now. --- bin/wcwidth-combining-comparator.py | 95 ----------------------------- 1 file changed, 95 deletions(-) delete mode 100755 bin/wcwidth-combining-comparator.py diff --git a/bin/wcwidth-combining-comparator.py b/bin/wcwidth-combining-comparator.py deleted file mode 100755 index c389c38..0000000 --- a/bin/wcwidth-combining-comparator.py +++ /dev/null @@ -1,95 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 -""" -Manual tests comparing python and wcwidth's combining characters. - -https://github.com/jquast/wcwidth - -No assertions raised with python 3.4 -""" -# pylint: disable=C0103 -# Invalid module name "wcwidth-combining-comparator" - -# standard imports -from __future__ import print_function -import unicodedata -import warnings -import locale -import sys - -# local imports -from wcwidth.wcwidth import _bisearch, COMBINING - - -def report_comb_msg(ucs, comb_py, comb_wc): - """ - Return string report of combining character differences. - - :param ucs: unicode point. - :type ucs: unicode - :param comb_py: python's reported combining character length. - :type comb_py: int - :param comb_wc: wcwidth's reported combining character length. - :type comb_wc: int - :rtype: unicode - """ - ucp = (ucs.encode('unicode_escape')[2:] - .decode('ascii') - .upper() - .lstrip('0')) - url = "http://codepoints.net/U+{0}".format(ucp) - # pylint: disable=W0703 - # Catching too general exception Exception (col 11) - try: - name = unicodedata.name(ucs) - except ValueError: - name = u'' - return (u"py,comb_table={0},{1} [--o{2}o--] name={3} val={4} {5}" - " ".format(comb_py, comb_wc, ucs, name, ord(ucs), url)) - -# use chr() for py3.x, -# unichr() for py2.x -try: - _ = unichr(0) -except NameError as err: - if err.args[0] == "name 'unichr' is not defined": - # pylint: disable=W0622 - # Redefining built-in 'unichr' (col 8) - - unichr = chr - else: - raise - -if sys.maxunicode < 1114111: - warnings.warn('narrow Python build, only a small subset of ' - 'characters may be tested.') - - -def _is_equal_combining(ucs): - comb_py = bool(unicodedata.category(ucs) in ['Mc', 'Me', 'Mn']) - comb_wc = bool(_bisearch(ord(ucs), COMBINING)) - assert comb_py == comb_wc, report_comb_msg(ucs, comb_py, comb_wc) - - -def main(using_locale='en_US.UTF-8'): - """ - Program entry point. - - Load the entire Unicode table into memory, for each character deemed - a combining character by either python or wcwidth.table_comb, display - their differences. - """ - all_ucs = (ucs for ucs in - [unichr(val) for val in range(sys.maxunicode)]) - - locale.setlocale(locale.LC_ALL, using_locale) - - for ucs in all_ucs: - try: - _is_equal_combining(ucs) - except AssertionError as err: - print(u'{0}'.format(err)) - - -if __name__ == '__main__': - main() From 1349efb70b8adef320e0dca3796e19f9ce4ccffd Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Sun, 13 Sep 2015 22:15:50 -0700 Subject: [PATCH 17/29] add 'docent' to requirements-develop.txt --- requirements-develop.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements-develop.txt b/requirements-develop.txt index 26d5ea7..1150de3 100644 --- a/requirements-develop.txt +++ b/requirements-develop.txt @@ -1,2 +1,3 @@ requests blessed +docopt From 5337aa315f63459b956a8d2397a72784a9db8661 Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Sun, 13 Sep 2015 22:16:06 -0700 Subject: [PATCH 18/29] remove custom 'setup.py develop' references --- setup.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/setup.py b/setup.py index 708fc8e..ab68f89 100755 --- a/setup.py +++ b/setup.py @@ -6,7 +6,6 @@ You may execute setup.py with special arguments: -- ``develop``: Ensures virtualenv and installs development tools. - ``update``: Updates unicode reference files of the project to latest. - ``test``: Executes test runner (tox) """ @@ -14,7 +13,6 @@ from __future__ import print_function import os import setuptools -import setuptools.command.develop import setuptools.command.test HERE = os.path.dirname(__file__) From 179a10c3329b3cf5337849f6d83e0293c645feef Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Sun, 13 Sep 2015 22:16:17 -0700 Subject: [PATCH 19/29] remove combining character from tests --- tox.ini | 2 -- 1 file changed, 2 deletions(-) diff --git a/tox.ini b/tox.ini index 73f66db..8011dbc 100644 --- a/tox.ini +++ b/tox.ini @@ -16,8 +16,6 @@ setenv = PYTHONIOENCODING=utf-8 commands = {envbindir}/py.test \ -rs --strict \ wcwidth/tests {posargs} -# for most python, also compare our combining - {toxinidir}/bin/wcwidth-combining-comparator.py [testenv:prospector] deps = prospector[with_everything] From f0ab2b66b3b6aef061a09924a44beba454a0c30b Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Sun, 13 Sep 2015 22:18:55 -0700 Subject: [PATCH 20/29] Prepare setup.py for 0.1.5 release --- setup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index ab68f89..1a05e72 100755 --- a/setup.py +++ b/setup.py @@ -226,7 +226,7 @@ def main(): import codecs setuptools.setup( name='wcwidth', - version='0.1.4', + version='0.1.5', description=("Measures number of Terminal column cells " "of wide-character codes"), long_description=codecs.open( @@ -242,13 +242,13 @@ def main(): classifiers=[ 'Intended Audience :: Developers', 'Natural Language :: English', - 'Development Status :: 2 - Pre-Alpha', + 'Development Status :: 3 - Alpha', 'Environment :: Console', 'License :: OSI Approved :: MIT License', 'Operating System :: POSIX', 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', 'Topic :: Software Development :: Libraries', 'Topic :: Software Development :: Localization', 'Topic :: Software Development :: Internationalization', From 88bea80e6acedcb003f60b94bc280fe4657d4c44 Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Sun, 13 Sep 2015 22:32:57 -0700 Subject: [PATCH 21/29] do static analysis via travis-ci py3.4 --- .travis.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.travis.yml b/.travis.yml index 0c85a4b..df93bbd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,6 +20,9 @@ install: script: - tox -e $TOXENV + - if [[ $TOXENV == "py34" ]]; then + tox -esa + fi after_success: - if [[ $TOXENV == "py34" ]]; then From 1ecbbf8571940d47e3ef97e92551c0882dad34fd Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Sun, 13 Sep 2015 22:33:08 -0700 Subject: [PATCH 22/29] bugfix duplicate wcwidth.c target in .rst --- README.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 0ce8cdf..a744380 100644 --- a/README.rst +++ b/README.rst @@ -130,9 +130,9 @@ wcwidth.c --------- This code was originally derived directly from C code of the same name, -whose latest version is available at: `wcwidth.c -`_ And is authored by Markus -Kuhn -- 2007-05-26 (Unicode 5.0) +whose latest version is available at +http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c And is authored by Markus Kuhn, +2007-05-26 (Unicode 5.0). Examples -------- From ebba4d0f13ba33079bc95e452a18b04c0352c340 Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Sun, 13 Sep 2015 22:33:20 -0700 Subject: [PATCH 23/29] rename prospector->sa (static analysis) --- tox.ini | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/tox.ini b/tox.ini index 8011dbc..45afe59 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = prospector, +envlist = sa, py26, py27, py33, @@ -17,19 +17,20 @@ commands = {envbindir}/py.test \ -rs --strict \ wcwidth/tests {posargs} -[testenv:prospector] -deps = prospector[with_everything] - --requirement=requirements-develop.txt -commands = prospector \ - --profile {toxinidir}/.prospector.yaml \ - --no-autodetect +[testenv:sa] +deps = --requirement=requirements-develop.txt -[testenv:vulture] -# execute only 'vulture', it has (known) false-positives. -deps = prospector[with_vulture] -commands = prospector -t vulture \ +# - prospector is configured using .prospector.yaml, and wraps several +# static analysis/linting and style-checker tools. +# - rst-lint ensures that README.rst will present correctly on pypi. +commands = {envbindir}/prospector \ --profile {toxinidir}/.prospector.yaml \ - --no-autodetect + --no-autodetect \ + --die-on-tool-error \ + --doc-warnings \ + {toxinidir} + {envbindir}/rst-lint README.rst + [testenv:py26] # python2.6 doesn't support/match all of our combining table From 8e28f44ea5dfa869e33d0aeee8606e801be8a37c Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Sun, 13 Sep 2015 22:33:30 -0700 Subject: [PATCH 24/29] add static analysis to requirements-develop.txt --- requirements-develop.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/requirements-develop.txt b/requirements-develop.txt index 1150de3..fc36f6a 100644 --- a/requirements-develop.txt +++ b/requirements-develop.txt @@ -1,3 +1,5 @@ requests blessed docopt +prospector[with_frosted,with_pyroma] +restructuredtext_lint From c03620f28923ee2884b8180c099641ea82acd096 Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Sun, 13 Sep 2015 22:33:40 -0700 Subject: [PATCH 25/29] static analysis: 2 newlines between funcs --- wcwidth/tests/__init__.py | 1 + wcwidth/tests/test_core.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/wcwidth/tests/__init__.py b/wcwidth/tests/__init__.py index e69de29..3813003 100644 --- a/wcwidth/tests/__init__.py +++ b/wcwidth/tests/__init__.py @@ -0,0 +1 @@ +"""This file intentionally left blank.""" diff --git a/wcwidth/tests/test_core.py b/wcwidth/tests/test_core.py index 87a4a95..b05a788 100755 --- a/wcwidth/tests/test_core.py +++ b/wcwidth/tests/test_core.py @@ -108,6 +108,7 @@ def test_combining_cafe(): assert length_each == expect_length_each assert length_phrase == expect_length_phrase + def test_combining_enclosing(): u"""CYRILLIC CAPITAL LETTER A + COMBINING CYRILLIC HUNDRED THOUSANDS SIGN is А҈ of length 1.""" phrase = u"\u0410\u0488" @@ -122,6 +123,7 @@ def test_combining_enclosing(): assert length_each == expect_length_each assert length_phrase == expect_length_phrase + def test_combining_spacing(): u"""Balinese kapal (ship) is ᬓᬨᬮ᭄ of length 4.""" phrase = u"\u1B13\u1B28\u1B2E\u1B44" From 67df6fa61a26c4acbff04a1c1d15dc622e928aaa Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Sun, 13 Sep 2015 22:44:41 -0700 Subject: [PATCH 26/29] prepare 0.1.5 changelog and docfix rst by linter --- README.rst | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index a744380..3d1f7d8 100644 --- a/README.rst +++ b/README.rst @@ -155,6 +155,12 @@ distributed as a script or part of the module. Changes ------- +0.1.5 *2015-09-13 Alpha* + * **Bugfix**: + Resolution of "combining character width", most especially + those that previously returned -1 now often (correctly) return 0. + resolved by `Philip Craig`_ via `PR #11`_. + 0.1.4 *2014-11-20 Pre-Alpha* * **Feature**: ``wcswidth()`` now determines printable length for (most) combining characters. The developer's tool @@ -165,11 +171,11 @@ Changes 0.1.3 *2014-10-29 Pre-Alpha* * **Bugfix**: 2nd parameter of wcswidth was not honored. - (`Thomas Ballinger`_, `PR #4`). + (`Thomas Ballinger`_, `PR #4`_). 0.1.2 *2014-10-28 Pre-Alpha* * **Updated** tables to Unicode Specification 7.0.0. - (`Thomas Ballinger`_, `PR #3`). + (`Thomas Ballinger`_, `PR #3`_). 0.1.1 *2014-05-14 Pre-Alpha* * Initial release to pypi, Based on Unicode Specification 6.3.0 From f2eb4b92639a3ad57efdc677eee5f89380ca4093 Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Sun, 13 Sep 2015 22:57:51 -0700 Subject: [PATCH 27/29] travis-ci faux-shell semicolon fix --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index df93bbd..b0482f5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -21,7 +21,7 @@ install: script: - tox -e $TOXENV - if [[ $TOXENV == "py34" ]]; then - tox -esa + tox -esa; fi after_success: From d72c944e1ca7c5626634416dba13e0698db8778c Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Sun, 13 Sep 2015 23:02:24 -0700 Subject: [PATCH 28/29] freeze develop dependencies --- requirements-develop.txt | 38 +++++++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/requirements-develop.txt b/requirements-develop.txt index fc36f6a..7461be4 100644 --- a/requirements-develop.txt +++ b/requirements-develop.txt @@ -1,5 +1,33 @@ -requests -blessed -docopt -prospector[with_frosted,with_pyroma] -restructuredtext_lint +# direct dependencies +requests==2.7.0 +blessed==1.9.5 +docopt==0.6.2 +prospector==0.10.2[with_frosted,with_pyroma] +restructuredtext-lint==0.12.2 + +# transient dependencies +astroid==1.3.8 +docutils==0.12 +dodgy==0.1.7 +enum34==1.0.4 +frosted==1.4.1 +ipaddress==1.0.14 +logilab-common==1.0.2 +mccabe==0.3.1 +pep257==0.6.0 +pep8==1.6.2 +pep8-naming==0.3.3 +pies==2.6.7 +pies2overrides==2.6.7 +pyflakes==0.9.2 +pylint==1.4.4 +pylint-celery==0.3 +pylint-common==0.2.1 +pylint-django==0.6.1 +pylint-plugin-utils==0.2.3 +pyroma==1.8.2 +PyYAML==3.11 +requirements-detector==0.4 +setoptconf==0.2.0 +six==1.9.0 +wheel==0.24.0 From 426d7481a8034d41a41f2483a0ae91d83953d7e3 Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Sun, 13 Sep 2015 23:04:34 -0700 Subject: [PATCH 29/29] resolve static analysis and 'usedevelop' tests --- requirements-develop.txt | 2 +- tox.ini | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/requirements-develop.txt b/requirements-develop.txt index 7461be4..3b1dd23 100644 --- a/requirements-develop.txt +++ b/requirements-develop.txt @@ -2,7 +2,7 @@ requests==2.7.0 blessed==1.9.5 docopt==0.6.2 -prospector==0.10.2[with_frosted,with_pyroma] +prospector[with_frosted,with_pyroma]==0.10.2 restructuredtext-lint==0.12.2 # transient dependencies diff --git a/tox.ini b/tox.ini index 45afe59..4406841 100644 --- a/tox.ini +++ b/tox.ini @@ -10,7 +10,6 @@ envlist = sa, [testenv] # for any python, run simple pytest (only) deps = pytest - setenv = PYTHONIOENCODING=utf-8 commands = {envbindir}/py.test \ @@ -19,6 +18,11 @@ commands = {envbindir}/py.test \ [testenv:sa] deps = --requirement=requirements-develop.txt +# anytime we use requirements-develop.txt, we must also +# demand that we use 'python setup.py develop', to satisfy +# an otherwise circular dependency on downstream 'blessed' +# library. +usedevelop = true # - prospector is configured using .prospector.yaml, and wraps several # static analysis/linting and style-checker tools. @@ -49,6 +53,7 @@ commands = {envbindir}/py.test \ deps = pytest pytest-cov --requirement=requirements-develop.txt +usedevelop = true commands = {envbindir}/py.test -v \ -x --strict \