Skip to content

Commit

Permalink
* tokenizing / syntax coloring support multi-line strings and comments
Browse files Browse the repository at this point in the history
* trace screen updated for multiline constructs
* ed.ex updated for multiline constructs
* fixes ticket 710
  • Loading branch information
matthewwalkerlewis committed Mar 1, 2013
1 parent 66bb4be commit 0f6d98d
Show file tree
Hide file tree
Showing 11 changed files with 185 additions and 31 deletions.
56 changes: 49 additions & 7 deletions bin/ed.ex
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,8 @@ sequence buffer -- In-memory buffer where the file is manipulated.
-- This is a sequence where each element is a sequence
-- containing one line of text. Each line of text ends with '\n'

sequence buffer_multi -- remember if the line ended with an open multiline token

positive_int screen_length -- number of lines on physical screen
positive_int screen_width

Expand Down Expand Up @@ -344,6 +346,7 @@ boolean stop -- indicates when to stop processing current buffer
sequence kill_buffer -- kill buffer of deleted lines or characters
kill_buffer = {}


boolean adding_to_kill -- TRUE if still accumulating deleted lines/chars

boolean multi_color -- use colors for keywords etc.
Expand Down Expand Up @@ -504,6 +507,32 @@ procedure set_absolute_position(natural window_line, positive_int column)
position(window_base + window_line, column)
end procedure

function get_multiline( integer bline )
if bline > 0 and bline < length( buffer_multi ) then
integer multi = buffer_multi[bline]
if not multiline_token( multi ) then
-- have to back up...
integer prev = bline - 1
while prev and not multiline_token( buffer_multi[prev] ) do
prev -= 1
end while
for re_line = prev + 1 to bline do
SyntaxColor( buffer[re_line], , get_multiline( re_line - 1 ) )
buffer_multi[re_line] = last_multiline_token()
end for
multi = buffer_multi[bline]
end if
return multi
end if
return 0
end function

procedure set_multiline( integer bline, multiline_token multi )
if bline > 0 and bline < length( buffer_multi ) then
buffer_multi[bline] = multi
end if
end procedure

procedure DisplayLine(buffer_line bline, window_line sline, boolean all_clear)
-- display a buffer line on a given line on the screen
-- if all_clear is TRUE then the screen area has already been cleared before getting here.
Expand All @@ -515,7 +544,8 @@ procedure DisplayLine(buffer_line bline, window_line sline, boolean all_clear)
set_absolute_position(sline, 1)
if multi_color then
-- color display
color_line = SyntaxColor(this_line)
color_line = SyntaxColor(this_line, ,get_multiline( bline - 1 ))
set_multiline( bline, last_multiline_token() )
last_pos = 0

for i = 1 to length(color_line) do
Expand Down Expand Up @@ -648,6 +678,7 @@ function add_line(file_number file_no)

line = convert_tabs(STANDARD_TAB_WIDTH, edit_tab_width, clean(line))
buffer = append(buffer, line)
buffer_multi &= -1
return TRUE
end function

Expand All @@ -656,6 +687,7 @@ procedure new_buffer()
buffer_list &= 0 -- place holder for new buffer
buffer_number = length(buffer_list)
buffer = {}
buffer_multi = {}
end procedure

procedure read_file(file_number file_no)
Expand Down Expand Up @@ -947,14 +979,20 @@ constant W_BUFFER_NUMBER = 1,
W_WINDOW_LENGTH = 4,
W_B_LINE = 11

enum
B_BUFFER,
B_MODIFIED,
B_VERSION,
B_MULTILINE,
$
procedure save_state()
-- save current state variables for a window
window_list[window_number] = {buffer_number, buffer_version, window_base,
window_length, auto_complete, multi_color,
dot_e, control_chars, cr_removed, file_name,
b_line, b_col, s_line, s_col, s_shift,
edit_tab_width}
buffer_list[buffer_number] = {buffer, modified, buffer_version}
buffer_list[buffer_number] = {buffer, modified, buffer_version, buffer_multi}
end procedure

procedure restore_state(window_id w)
Expand All @@ -967,9 +1005,10 @@ procedure restore_state(window_id w)
window_number = w
buffer_number = state[W_BUFFER_NUMBER]
buffer_info = buffer_list[buffer_number]
buffer = buffer_info[1]
modified = buffer_info[2]
buffer_version = buffer_info[3]
buffer = buffer_info[B_BUFFER]
modified = buffer_info[B_MODIFIED]
buffer_version = buffer_info[B_VERSION]
buffer_multi = buffer_info[B_MULTILINE]
buffer_list[buffer_number] = 0 -- save space

-- restore other variables
Expand Down Expand Up @@ -1075,7 +1114,7 @@ function delete_window()
-- delete the current window
boolean buff_in_use

buffer_list[buffer_number] = {buffer, modified, buffer_version}
buffer_list[buffer_number] = {buffer, modified, buffer_version, buffer_multi}
window_list = window_list[1..window_number-1] &
window_list[window_number+1..length(window_list)]
buff_in_use = FALSE
Expand Down Expand Up @@ -1952,6 +1991,7 @@ procedure insert(char key)
-- truncate this line and create a new line using tail
buffer[b_line] = head( buffer[b_line], b_col-1) & '\n'
buffer = eu:insert( buffer, tail, b_line + 1 )
buffer_multi = eu:insert( buffer_multi, -1, b_line + 1 )

if s_line = window_length then
arrow_down()
Expand Down Expand Up @@ -1993,6 +2033,7 @@ procedure insert_string(sequence text)
insert(text[i])
else
buffer[b_line] = splice( buffer[b_line], text[i], b_col )
buffer_multi[b_line] = splice( buffer_multi[b_line], -1, b_col )
b_col += 1
if i = length(text) then
DisplayLine(b_line, s_line, FALSE)
Expand Down Expand Up @@ -2077,7 +2118,8 @@ procedure insert_kill_buffer()
insert_string(kill_buffer)
else
-- inserting a sequence of lines
buffer = splice( buffer, kill_buffer, b_line )
buffer = splice( buffer, kill_buffer, b_line )
buffer_multi = splice( buffer_multi, repeat( -1, length( kill_buffer ) ), b_line )
DisplayWindow(b_line, s_line)
b_col = 1
s_col = 1
Expand Down
2 changes: 2 additions & 0 deletions docs/release/4.1.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
* Updated demo/news.ex with up-to-date URLs for some news web sites.
* Fix std/net/http.e so it can handle cases where the Content-Length header is not present
* Fix std/sequence.e so store() will correctly handle the one-element index case - it was duplicating the entire sequence before.
* [[ticket:710]] Updated tokenizer and syntax coloring to be able to preserve state between lines.
The euphoria trace screen and ed.ex now properly colorize multiline strings and comments.

== Enhancements

Expand Down
1 change: 1 addition & 0 deletions include/euphoria/symstruct.e
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,5 @@ public constant
SL_LINE = offset( C_SHORT ),
SL_FILE_NO = offset( C_CHAR ),
SL_OPTIONS = offset( C_CHAR ),
SL_MULTILINE = offset( C_INT ),
SL_SIZE = next_offset + remainder( next_offset, sizeof( C_POINTER ) ) -- padding
23 changes: 19 additions & 4 deletions include/euphoria/syncolor.e
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
--****
-- == Syntax Coloring
--
-- <<LEVELTOC level=2 depth=4>>
--
-- Syntax Color
-- Break Euphoria statements into words with multiple colors.
-- The editor and pretty printer (eprint.ex) both use this file.
Expand All @@ -22,7 +24,7 @@ namespace syncolor
include std/text.e
include std/eumem.e

include tokenize.e
public include tokenize.e

integer NORMAL_COLOR,
COMMENT_COLOR,
Expand Down Expand Up @@ -126,7 +128,7 @@ public function new()
return state
end function

--**
--
-- Reset the state to begin parsing a new file
--
-- See Also:
Expand All @@ -149,9 +151,18 @@ public procedure keep_newlines(integer val = 1, atom state = g_state)
eumem:ram_space[state][S_KEEP_NEWLINES] = val
end procedure





--**
-- Parse Euphoria code into tokens of like colors.
--
-- Parameters:
-- # ##pline## the source code to color
-- # ##state## (default g_state) the tokenizer to use
-- # ##multi## the multiline token from the previous line
--
-- Break up a new-line terminated line into colored text segments identifying the
-- various parts of the Euphoria language. They are broken into separate tokens.
--
Expand All @@ -161,8 +172,12 @@ end procedure
-- {{color1, "text1"}, {color2, "text2"}, ... }
-- </eucode>
--
-- Comments:
-- In order to properly color multiline syntax (strings and comments), you should pass
-- a value for ##multi##. This value can be attained by calling ##[[:last_multiline_token]]##
-- after coloring the previous line.

public function SyntaxColor(sequence pline, atom state=g_state)
public function SyntaxColor(sequence pline, atom state=g_state, multiline_token multi = 0)
integer class, last, i
sequence word, c
atom token = eumem:ram_space[state][S_TOKENIZER]
Expand All @@ -176,7 +191,7 @@ public function SyntaxColor(sequence pline, atom state=g_state)
tokenize:return_literal_string(,token)
tokenize:string_strip_quotes(0,token)

line = tokenize:tokenize_string(pline, token, 0)
line = tokenize:tokenize_string(pline, token, 0, multi)
-- TODO error checking?
line = line[1]
current_color = DONT_CARE
Expand Down
87 changes: 78 additions & 9 deletions include/euphoria/tokenize.e
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ public enum
-- this list of delimiters must match the order of the corresponding T_ codes above
constant Delimiters = "+-*/<>!&" & "=(){}[]?,.:$" -- double & single ops

--****
-- === Token accessors
public enum
TTYPE,
TDATA,
Expand Down Expand Up @@ -417,13 +419,16 @@ function scankeep_white(atom state = g_state)
return FALSE
end function

function scan_multicomment(atom state = g_state)
function scan_multicomment(atom state = g_state, multiline_token multi = 0)
Token[TTYPE] = T_COMMENT
Token[TDATA] = "/"
if not multi then
Token[TDATA] = "/"
end if
Token[TFORM] = TF_COMMENT_MULTIPLE

while 1 do
if (Look = io:EOF) or (Look = EOL) then
last_multi = TF_COMMENT_MULTIPLE
-- report_error(ERR_EOF)
return TRUE
end if
Expand Down Expand Up @@ -503,7 +508,37 @@ function lookahead_for( object needle, integer look_at = 1 )
return TRUE
end function

function raw_string( sequence delimiter, atom state )
public type multiline_token( object mlt )
if not atom( mlt ) then
return 0
end if
if mlt = 0
or mlt = TF_STRING_BACKTICK
or mlt = TF_STRING_TRIPLE
or mlt = TF_COMMENT_MULTIPLE then
return 1
end if
return 0
end type

multiline_token last_multi = 0

--**
--
-- Returns:
-- One of 0, TF_COMMENT_MULTIPLE, TF_STRING_BACKTICK, TF_STRING_TRIPLE.
--
-- Comments:
-- After calling ##[[:tokenize_string]]##, this function will return a value of 0
-- if the line did not end in the middle of a multiline construct, or the value
-- for the respective token. This is meant to facilitate proper tokenizing of
-- individual lines of code.

public function last_multiline_token()
return last_multi
end function

function raw_string( sequence delimiter, atom state, multiline_token multi = 0 )
Token[TTYPE] = T_STRING
Token[TDATA] = ""

Expand All @@ -526,9 +561,16 @@ function raw_string( sequence delimiter, atom state )

if eumem:ram_space[state][STRING_KEEP_QUOTES] then
if Look = io:EOF then
Token[TDATA] = delimiter & Token[TDATA]
if not multi then
Token[TDATA] = delimiter & Token[TDATA]
end if
last_multi = Token[TFORM]
else
Token[TDATA] = delimiter & Token[TDATA] & delimiter
if multi then
Token[TDATA] = Token[TDATA] & delimiter
else
Token[TDATA] = delimiter & Token[TDATA] & delimiter
end if
end if
end if

Expand Down Expand Up @@ -1098,13 +1140,26 @@ end procedure
--****
-- === Routines

public function tokenize_string(sequence code, atom state = g_state, integer stop_on_error = TRUE)
--**
-- Tokenize euphoria source code
--
-- Parameters:
-- # ##code## The code to be tokenized
-- # ##state## (default g_state) the tokenizer returned by ##[[:new]]##
-- # ##stop_on_error## (default TRUE)
-- # ##multi## one of 0, TF_COMMENT_MULTIPLE, TF_STRING_BACKTICK, TF_STRING_TRIPLE
--
-- Returns:
-- Sequence of tokens

public function tokenize_string(sequence code, atom state = g_state, integer stop_on_error = TRUE, multiline_token multi = 0)
sequence tokens

ERR = FALSE
ERR_LNUM = 0
ERR_LPOS = 0

last_multi = 0

tokens = {}

source_text = code
Expand All @@ -1121,7 +1176,21 @@ public function tokenize_string(sequence code, atom state = g_state, integer sto
Token[TLNUM] = 1
Token[TLPOS] = 1

if (Look = '#') and (lookahead(1) = '!') then
if multi then
sti = 0
switch multi do
case TF_STRING_BACKTICK then
raw_string( "`", state, multi )

case TF_STRING_TRIPLE then
raw_string( `"""`, state, multi )
case TF_COMMENT_MULTIPLE then
scan_multicomment( state, multi )
case else
-- error?
end switch
tokens &= { Token }
elsif (Look = '#') and (lookahead(1) = '!') then
sti += 1
scan_char(state)
if eumem:ram_space[state][DELETE_WHITE] then
Expand Down Expand Up @@ -1176,7 +1245,7 @@ public constant token_names = {
}

public constant token_forms = {
"TF_HEX", "TF_INT", "TF_ATOM", "TF_STRING_SINGLE", "TF_STRING_TRIPPLE",
"TF_HEX", "TF_INT", "TF_ATOM", "TF_STRING_SINGLE", "TF_STRING_TRIPLE",
"TF_STRING_BACKTICK", "TF_STRING_HEX", "TF_COMMENT_SINGLE", "TF_COMMENT_MULTIPLE"
}

Expand Down
7 changes: 7 additions & 0 deletions source/backend.e
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ procedure BackEnd(integer il_file)
end if

short = length(eentry) < 4
sequence started_file = repeat( 0, length( known_files ) )
for j = 1 to repcount do
poke2(addr + SL_LINE, eentry[LINE-short]) -- hits 4,5,6,7
-- 7 should be 0 unless 16 million
Expand All @@ -222,6 +223,12 @@ procedure BackEnd(integer il_file)
end if
poke(addr + SL_OPTIONS, eentry[OPTIONS]) -- else leave it 0
end if
if started_file[eentry[LOCAL_FILE_NO-short]] then
poke4( addr + SL_MULTILINE, -1 )
else
poke4( addr + SL_MULTILINE, 0 )
started_file[eentry[LOCAL_FILE_NO-short]] = 1
end if
addr += SL_SIZE
eentry[LINE-short] += 1
end for
Expand Down
Loading

0 comments on commit 0f6d98d

Please sign in to comment.