diff --git a/pygls/workspace/document.py b/pygls/workspace/document.py index b2e99fce..c98f9711 100644 --- a/pygls/workspace/document.py +++ b/pygls/workspace/document.py @@ -169,7 +169,7 @@ def offset_at_position(self, client_position: types.Position) -> int: lines, client_position ) row, col = server_position.line, server_position.character - return col + sum(self.position.utf16_num_units(line) for line in lines[:row]) + return col + sum(self.position.client_num_units(line) for line in lines[:row]) @property def source(self) -> str: diff --git a/pygls/workspace/position.py b/pygls/workspace/position.py index 4b7b7cbe..f54071a4 100644 --- a/pygls/workspace/position.py +++ b/pygls/workspace/position.py @@ -47,14 +47,21 @@ def utf16_unit_offset(self, chars: str): """ return sum(self.is_char_beyond_multilingual_plane(ch) for ch in chars) - def utf16_num_units(self, chars: str): + def client_num_units(self, chars: str): """ Calculate the length of `str` in utf-16 code units. Arguments: chars (str): The string to return the length in utf-16 code units for. """ - return len(chars) + self.utf16_unit_offset(chars) + utf32_units = len(chars) + if self.encoding == types.PositionEncodingKind.Utf32: + return utf32_units + + if self.encoding == types.PositionEncodingKind.Utf8: + return utf32_units + (self.utf16_unit_offset(chars) * 2) + + return utf32_units + self.utf16_unit_offset(chars) def position_from_client_units( self, lines: List[str], position: types.Position @@ -88,24 +95,24 @@ def position_from_client_units( if len(lines) == 0: return types.Position(0, 0) if position.line >= len(lines): - return types.Position(len(lines) - 1, self.utf16_num_units(lines[-1])) + return types.Position(len(lines) - 1, self.client_num_units(lines[-1])) _line = lines[position.line] _line = _line.replace("\r\n", "\n") # TODO: it's a bit of a hack - _utf16_len = self.utf16_num_units(_line) + _client_len = self.client_num_units(_line) _utf32_len = len(_line) - if _utf16_len == 0: + if _client_len == 0: return types.Position(position.line, 0) - _utf16_end_of_line = self.utf16_num_units(_line) - if position.character > _utf16_end_of_line: - position.character = _utf16_end_of_line - 1 + _client_end_of_line = self.client_num_units(_line) + if position.character > _client_end_of_line: + position.character = _client_end_of_line - 1 - _utf16_index = 0 + _client_index = 0 utf32_index = 0 while True: - _is_searching_queried_position = _utf16_index < position.character + _is_searching_queried_position = _client_index < position.character _is_before_end_of_line = utf32_index < _utf32_len _is_searching_for_position = ( _is_searching_queried_position and _is_before_end_of_line @@ -116,9 +123,13 @@ def position_from_client_units( _current_char = _line[utf32_index] _is_double_width = Position.is_char_beyond_multilingual_plane(_current_char) if _is_double_width: - _utf16_index += 2 + if self.encoding == types.PositionEncodingKind.Utf32: + _client_index += 1 + if self.encoding == types.PositionEncodingKind.Utf8: + _client_index += 4 + _client_index += 2 else: - _utf16_index += 1 + _client_index += 1 utf32_index += 1 position = types.Position(line=position.line, character=utf32_index) @@ -141,10 +152,12 @@ def position_to_client_unit( The position with `character` being converted to UTF-[32|16|8] code units. """ try: + character = self.client_num_units( + lines[position.line][: position.character] + ) return types.Position( line=position.line, - character=position.character - + self.utf16_unit_offset(lines[position.line][: position.character]), + character=character, ) except IndexError: return types.Position(line=len(lines), character=0) diff --git a/tests/conftest.py b/tests/conftest.py index 6e3eada5..1e8d88ae 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -111,11 +111,6 @@ def server_dir(): json_server_client = create_client_for_server("json_server.py") -@pytest.fixture -def doc(): - return TextDocument(DOC_URI, DOC) - - @pytest.fixture def feature_manager(): """Return a feature manager""" diff --git a/tests/test_document.py b/tests/test_document.py index b83e0e2b..14903d4f 100644 --- a/tests/test_document.py +++ b/tests/test_document.py @@ -105,7 +105,8 @@ def test_document_line_edit(): assert doc.source == "itsgoodbyeworld" -def test_document_lines(doc): +def test_document_lines(): + doc = Document(DOC_URI, DOC) assert len(doc.lines) == 4 assert doc.lines[0] == "document\n" @@ -160,7 +161,9 @@ def test_document_no_edit(): assert doc.lines == old -def test_document_props(doc): +def test_document_props(): + doc = Document(DOC_URI, DOC) + assert doc.uri == DOC_URI assert doc.source == DOC @@ -180,9 +183,25 @@ def test_position_from_utf16(): ['x="😋"'], types.Position(line=0, character=5) ) == types.Position(line=0, character=4) - actual = types.Position(line=0, character=5) - position.position_from_client_units(['x="😋"'], actual) - assert actual == types.Position(line=0, character=5) + +def test_position_from_utf32(): + position = Position(encoding=types.PositionEncodingKind.Utf32) + assert position.position_from_client_units( + ['x="😋"'], types.Position(line=0, character=3) + ) == types.Position(line=0, character=3) + assert position.position_from_client_units( + ['x="😋"'], types.Position(line=0, character=4) + ) == types.Position(line=0, character=4) + + +def test_position_from_utf8(): + position = Position(encoding=types.PositionEncodingKind.Utf8) + assert position.position_from_client_units( + ['x="😋"'], types.Position(line=0, character=3) + ) == types.Position(line=0, character=3) + assert position.position_from_client_units( + ['x="😋"'], types.Position(line=0, character=7) + ) == types.Position(line=0, character=4) def test_position_to_utf16(): @@ -195,9 +214,27 @@ def test_position_to_utf16(): ['x="😋"'], types.Position(line=0, character=4) ) == types.Position(line=0, character=5) - actual = types.Position(line=0, character=4) - position.position_to_client_unit(['x="😋"'], actual) - assert actual == types.Position(line=0, character=4) + +def test_position_to_utf32(): + position = Position(encoding=types.PositionEncodingKind.Utf32) + assert position.position_to_client_unit( + ['x="😋"'], types.Position(line=0, character=3) + ) == types.Position(line=0, character=3) + + assert position.position_to_client_unit( + ['x="😋"'], types.Position(line=0, character=4) + ) == types.Position(line=0, character=4) + + +def test_position_to_utf8(): + position = Position(encoding=types.PositionEncodingKind.Utf8) + assert position.position_to_client_unit( + ['x="😋"'], types.Position(line=0, character=3) + ) == types.Position(line=0, character=3) + + assert position.position_to_client_unit( + ['x="😋"'], types.Position(line=0, character=4) + ) == types.Position(line=0, character=6) def test_range_from_utf16(): @@ -250,7 +287,8 @@ def test_range_to_utf16(): assert actual == expected -def test_offset_at_position(doc): +def test_offset_at_position_utf16(): + doc = Document(DOC_URI, DOC) assert doc.offset_at_position(types.Position(line=0, character=8)) == 8 assert doc.offset_at_position(types.Position(line=1, character=5)) == 12 assert doc.offset_at_position(types.Position(line=2, character=0)) == 13 @@ -262,6 +300,18 @@ def test_offset_at_position(doc): assert doc.offset_at_position(types.Position(line=5, character=0)) == 40 +def test_offset_at_position_utf32(): + doc = Document(DOC_URI, DOC, position_encoding=types.PositionEncodingKind.Utf32) + assert doc.offset_at_position(types.Position(line=0, character=8)) == 8 + assert doc.offset_at_position(types.Position(line=5, character=0)) == 39 + + +def test_offset_at_position_utf8(): + doc = Document(DOC_URI, DOC, position_encoding=types.PositionEncodingKind.Utf8) + assert doc.offset_at_position(types.Position(line=0, character=8)) == 8 + assert doc.offset_at_position(types.Position(line=5, character=0)) == 41 + + def test_utf16_to_utf32_position_cast(): position = Position(encoding=types.PositionEncodingKind.Utf16) lines = ["", "😋😋", ""] @@ -305,10 +355,12 @@ def test_position_for_line_endings(): ) == types.Position(line=1, character=1) -def test_word_at_position(doc): +def test_word_at_position(): """ Return word under the cursor (or last in line if past the end) """ + doc = Document(DOC_URI, DOC) + assert doc.word_at_position(types.Position(line=0, character=8)) == "document" assert doc.word_at_position(types.Position(line=0, character=1000)) == "document" assert doc.word_at_position(types.Position(line=1, character=5)) == "for"