Skip to content

Commit

Permalink
Merge pull request #1829 from ksss/lex
Browse files Browse the repository at this point in the history
Implement token list API
  • Loading branch information
soutaro committed May 29, 2024
2 parents 1d42c1c + a274107 commit 0831489
Show file tree
Hide file tree
Showing 8 changed files with 135 additions and 1 deletion.
20 changes: 20 additions & 0 deletions ext/rbs_extension/parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -2885,9 +2885,29 @@ rbsparser_parse_signature(VALUE self, VALUE buffer, VALUE end_pos)
return rb_ensure(parse_signature_try, (VALUE)parser, ensure_free_parser, (VALUE)parser);
}

static VALUE
rbsparser_lex(VALUE self, VALUE buffer, VALUE end_pos) {
lexstate *lexer = alloc_lexer(buffer, 0, FIX2INT(end_pos));
VALUE results = rb_ary_new();

token token = NullToken;
while (token.type != pEOF) {
token = rbsparser_next_token(lexer);
VALUE type = ID2SYM(rb_intern(token_type_str(token.type)));
VALUE location = rbs_new_location(buffer, token.range);
VALUE pair = rb_ary_new3(2, type, location);
rb_ary_push(results, pair);
}

free(lexer);

return results;
}

void rbs__init_parser(void) {
RBS_Parser = rb_define_class_under(RBS, "Parser", rb_cObject);
rb_define_singleton_method(RBS_Parser, "_parse_type", rbsparser_parse_type, 5);
rb_define_singleton_method(RBS_Parser, "_parse_method_type", rbsparser_parse_method_type, 5);
rb_define_singleton_method(RBS_Parser, "_parse_signature", rbsparser_parse_signature, 2);
rb_define_singleton_method(RBS_Parser, "_lex", rbsparser_lex, 2);
}
7 changes: 6 additions & 1 deletion ext/rbs_extension/parserstate.c
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ VALUE comment_to_ruby(comment *com, VALUE buffer) {
);
}

parserstate *alloc_parser(VALUE buffer, int start_pos, int end_pos, VALUE variables) {
lexstate *alloc_lexer(VALUE buffer, int start_pos, int end_pos) {
VALUE string = rb_funcall(buffer, rb_intern("content"), 0);

StringValue(string);
Expand All @@ -290,6 +290,11 @@ parserstate *alloc_parser(VALUE buffer, int start_pos, int end_pos, VALUE variab
lexer->start = lexer->current;
lexer->first_token_of_line = lexer->current.column == 0;

return lexer;
}

parserstate *alloc_parser(VALUE buffer, int start_pos, int end_pos, VALUE variables) {
lexstate *lexer = alloc_lexer(buffer, start_pos, end_pos);
parserstate *parser = calloc(1, sizeof(parserstate));
parser->lexstate = lexer;
parser->buffer = buffer;
Expand Down
9 changes: 9 additions & 0 deletions ext/rbs_extension/parserstate.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,15 @@ void parser_insert_typevar(parserstate *state, ID id);
* */
bool parser_typevar_member(parserstate *state, ID id);

/**
* Allocate new lexstate object.
*
* ```
* alloc_lexer(buffer, 0, 31) // New lexstate with buffer
* ```
* */
lexstate *alloc_lexer(VALUE buffer, int start_pos, int end_pos);

/**
* Allocate new parserstate object.
*
Expand Down
15 changes: 15 additions & 0 deletions lib/rbs/parser/lex_result.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# frozen_string_literal: true

module RBS
class Parser
class LexResult
attr_reader :buffer
attr_reader :value

def initialize(buffer:, value:)
@buffer = buffer
@value = value
end
end
end
end
23 changes: 23 additions & 0 deletions lib/rbs/parser/token.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# frozen_string_literal: true

module RBS
class Parser
class Token
attr_reader :type
attr_reader :location

def initialize(type:, location:)
@type = type
@location = location
end

def value
@location.source
end

def comment?
@type == :tCOMMENT || @type == :tLINECOMMENT
end
end
end
end
12 changes: 12 additions & 0 deletions lib/rbs/parser_aux.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# frozen_string_literal: true

require_relative "parser/lex_result"
require_relative "parser/token"

module RBS
class Parser
def self.parse_type(source, range: 0..., variables: [], require_eof: false)
Expand All @@ -19,6 +22,15 @@ def self.parse_signature(source)
[buf, dirs, decls]
end

def self.lex(source)
buf = buffer(source)
list = _lex(buf, buf.last_position)
value = list.map do |type, location|
Token.new(type: type, location: location)
end
LexResult.new(buffer: buf, value: value)
end

def self.buffer(source)
case source
when String
Expand Down
28 changes: 28 additions & 0 deletions sig/parser.rbs
Original file line number Diff line number Diff line change
@@ -1,5 +1,23 @@
module RBS
class Parser
# Result of `Parser.lex`
class LexResult
attr_reader buffer: Buffer
attr_reader value: Array[Token]

def initialize: (buffer: Buffer, value: Array[Token]) -> void
end

# Represents a token per result of `Parser.lex`.
class Token
attr_reader type: Symbol
attr_reader location: Location[untyped, untyped]

def initialize: (type: Symbol, location: Location[untyped, untyped]) -> void
def value: () -> String
def comment?: () -> bool
end

# Parse a method type and return it
#
# When `range` keyword is specified, it starts parsing from the `begin` to the `end` of the range.
Expand Down Expand Up @@ -50,6 +68,14 @@ module RBS
#
def self.parse_signature: (Buffer | String) -> [Buffer, Array[AST::Directives::t], Array[AST::Declarations::t]]

# Parse whole RBS file and return result.
#
# ```ruby
# RBS::Parser.lex("# Comment\nmodule A\nend\n").value.map(&:type)
# # => [:tLINECOMMENT, :kMODULE, :tUIDENT, :kEND, :pEOF]
# ```
def self.lex: (Buffer | String) -> LexResult

KEYWORDS: Hash[String, bot]

private
Expand All @@ -62,6 +88,8 @@ module RBS

def self._parse_signature: (Buffer, Integer end_pos) -> [Array[AST::Directives::t], Array[AST::Declarations::t]]

def self._lex: (Buffer, Integer end_pos) -> Array[[Symbol, Location[untyped, untyped]]]

class LocatedValue
end
end
Expand Down
22 changes: 22 additions & 0 deletions test/rbs/parser_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -767,4 +767,26 @@ def test_proc__untyped_function_parse_error
RBS::Parser.parse_type("^(?) { (?) -> void } -> Integer")
end
end

def test__lex
content = <<~RBS
# LineComment
class Foo[T < Integer] < Bar # Comment
end
RBS
tokens = RBS::Parser._lex(buffer(content), content.length)
assert_equal [:tLINECOMMENT, '# LineComment', 0...13], tokens[0].then { |t| [t[0], t[1].source, t[1].range] }
assert_equal [:kCLASS, 'class', 14...19], tokens[1].then { |t| [t[0], t[1].source, t[1].range] }
assert_equal [:tUIDENT, 'Foo', 20...23], tokens[2].then { |t| [t[0], t[1].source, t[1].range] }
assert_equal [:pLBRACKET, '[', 23...24], tokens[3].then { |t| [t[0], t[1].source, t[1].range] }
assert_equal [:tUIDENT, 'T', 24...25], tokens[4].then { |t| [t[0], t[1].source, t[1].range] }
assert_equal [:pLT, '<', 26...27], tokens[5].then { |t| [t[0], t[1].source, t[1].range] }
assert_equal [:tUIDENT, 'Integer', 28...35], tokens[6].then { |t| [t[0], t[1].source, t[1].range] }
assert_equal [:pRBRACKET, ']', 35...36], tokens[7].then { |t| [t[0], t[1].source, t[1].range] }
assert_equal [:pLT, '<', 37...38], tokens[8].then { |t| [t[0], t[1].source, t[1].range] }
assert_equal [:tUIDENT, 'Bar', 39...42], tokens[9].then { |t| [t[0], t[1].source, t[1].range] }
assert_equal [:tCOMMENT, '# Comment', 43...52], tokens[10].then { |t| [t[0], t[1].source, t[1].range] }
assert_equal [:kEND, 'end', 53...56], tokens[11].then { |t| [t[0], t[1].source, t[1].range] }
assert_equal [:pEOF, '', 57...58], tokens[12].then { |t| [t[0], t[1].source, t[1].range] }
end
end

0 comments on commit 0831489

Please sign in to comment.