Skip to content

Commit

Permalink
Merge pull request #956 from andrew-johnson-4/regex-syntax
Browse files Browse the repository at this point in the history
Regex syntax
  • Loading branch information
andrew-johnson-4 authored Nov 20, 2024
2 parents 43eacc3 + 1d19832 commit a07d52f
Showing 14 changed files with 20,204 additions and 19,868 deletions.
39,895 changes: 20,042 additions & 19,853 deletions BOOTSTRAP/cli.c

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "lambda_mountain"
version = "1.19.13"
version = "1.19.14"
authors = ["Andrew <andrew@subarctic.org>"]
license = "MIT"
description = "Typed Macro Assembler (backed by Coq proofs-of-correctness)"
2 changes: 1 addition & 1 deletion EXAMPLES/tokenizer.lsts
Original file line number Diff line number Diff line change
@@ -4,8 +4,8 @@ import $"LIB/default.lm";
let c-tokenize-new(text: String): List<String> = (
let tokens = [] :: List<String>;
while non-zero(text) {match text {
(m=/^de/)..rest => (print("D.."); print(m); text = "";);
"a"..rest => (print("A.."); print(rest); text="";);
rest.."f" => (print("..F"); print(rest); text="";);
"abc" => (print("ABC"); text = "";);
rst => (print("Default: "); print(rst); text = "";);
# "\s"..rst => text = rst;
2 changes: 1 addition & 1 deletion EXAMPLES/tokenizer.lsts.out
Original file line number Diff line number Diff line change
@@ -1 +1 @@
llA..bc..Fde
llA..bcD..de
18 changes: 18 additions & 0 deletions PLATFORM/C/LIB/common-macros.lm
Original file line number Diff line number Diff line change
@@ -91,6 +91,24 @@ macro ('match-pats-arm( term ('macro::lhs-prefix-or-suffix( rest (:Literal: l) )
))
);

macro ('match-pats-arm( term ('macro::lhs-prefix-or-suffix( rest ('macro::lhs-bind( (:Variable: b) (:Literal: l) )) )) )) (
( (let (uuid v) term) (
(if (.has-suffix( (uuid v) l )) (
(let b (.remove-suffix( (uuid v) l )))
(match-pats-arm( b rest ))
) (branchfalse()))
))
);

macro ('match-pats-arm( term ('macro::lhs-prefix-or-suffix( ('macro::lhs-bind( (:Variable: b) (:Literal: l) )) rest )) )) (
( (let (uuid v) term) (
(if (.has-prefix( (uuid v) l )) (
(let b (.remove-prefix( (uuid v) l )))
(match-pats-arm( b rest ))
) (branchfalse()))
))
);

macro ('match-pats-arm( term ((:Tag: l lt) ( x1 )) )) (
(
(let (uuid v) term)
1 change: 1 addition & 0 deletions PLATFORM/C/LIB/default.lm
Original file line number Diff line number Diff line change
@@ -23,3 +23,4 @@ import PLATFORM/C/LIB/list.lm;
import PLATFORM/C/LIB/hashtable.lm;
import PLATFORM/C/LIB/array.lm;
import PLATFORM/C/LIB/io.lm;
import PLATFORM/C/LIB/regex.lm;
39 changes: 39 additions & 0 deletions PLATFORM/C/LIB/regex.lm
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@

import regex.h;

atom suffix Regex _rgx;

.rm_so := λ: Blob(: t C_regmatch__t_). (: (
(:frame( (:frame t) ))
(:expression( '\[_l (:expression t) '.rm_so\]_l ))
) U64);
.rm_eo := λ: Blob(: t C_regmatch__t_). (: (
(:frame( (:frame t) ))
(:expression( '\[_l (:expression t) '.rm_eo\]_l ))
) U64);

.has-prefix := λ(: text SmartString)(: rgx Regex). (: (
(let status (regexec(
(as (& rgx) C_regex__t_*_)
(as (.start text) C_char_*)
(as 0_u64 C_size__t_)
(as 0_u64 C_regmatch__t_*)
(as 0_u64 C_int)
)))
(==( (as status U64) 0_u64 ))
) U64);

.remove-prefix := λ(: text SmartString)(: rgx Regex). (: (
(let matches (as (malloc(sizeof C_regmatch__t_)) C_regmatch__t_[]))
(let status (regexec(
(as (& rgx) C_regex__t_*_)
(as (.start text) C_char_*)
(as 1_u64 C_size__t_)
(as matches C_regmatch__t_*)
(as 0_u64 C_int)
)))
(if (!=( (.rm_so([]( matches 0_u64 ))) 0_u64 )) (
(fail 'Remove\sSmartString\sPrefix\sBy\sRegex\sNot\sA\sPrefix_s)
) ())
([:]( text 0_i64 (as (.rm_eo([]( matches 0_u64 ))) I64) ))
) SmartString);
16 changes: 15 additions & 1 deletion PLUGINS/BACKEND/C/compile-expr-direct.lm
Original file line number Diff line number Diff line change
@@ -16,7 +16,7 @@ compile-expr-direct := λ(: ctx FContext)(: term AST)(: stack-offset I64)(: used
))
( (Lit( val _ )) (
(let ltype (typeof term))
(if (||( (is-t( ltype 'String_s )) (is-t( ltype 'SmartString_s )) )) (
(if (||( (||( (is-t( ltype 'String_s )) (is-t( ltype 'SmartString_s )) )) (is-t( ltype 'Regex_s )) )) (
(if (is-t( ltype 'String_s )) (
(set e (compile-declare-cstring( val )))
) ())
@@ -25,6 +25,20 @@ compile-expr-direct := λ(: ctx FContext)(: term AST)(: stack-offset I64)(: used
(ascript-normal( lit (t1 'String_s) ))
(set e (compile-stack-calls( ctx 'intern_s TAny (typeof term) lit stack-offset Used )))
) ())
(if (is-t( ltype 'Regex_s )) (
(let rgx-id (uuid()))
(set assemble-header-section (+(
assemble-header-section
(+( (+( (SAtom 'regex_t\s_s) (SAtom rgx-id) )) (SAtom '\:\n_s) ))
)))
(set assemble-global-initializer-section (+(
assemble-global-initializer-section
(+( (+( (+( (SAtom '{int\sreturn_code\s=\sregcomp\[&_s) (SAtom rgx-id) ))
(+( (SAtom ',\s"_s) (SAtom val) )) ))
(SAtom '",\s0\]\:if\[return_code\]{fprintf\[stderr,"Could\snot\scompile\sregex."\]\:exit\[1\]\:}}\:\n_s) ))
)))
(set e (fragment::expression rgx-id))
) ())
) (
(let isa-fragment False_u8)
(if (==( isa-fragment True_u8 )) (
6 changes: 5 additions & 1 deletion PLUGINS/BACKEND/C/mangle-c-type.lm
Original file line number Diff line number Diff line change
@@ -29,10 +29,14 @@ mangle-c-type-internal := λ(: tt Type). (: (
( (TGround( 'String_s _ )) (set r (SAtom 'char*_s)) )
( (TGround( 'File_s _ )) (set r (SAtom 'FILE_s)) )
( (TGround( 'PID_s _ )) (set r (SAtom 'pid_t_s)) )
( (TGround( 'Regex_s _ )) (set r (SAtom 'regex_t_s)) )
( (TGround( 'Array_s (LCons( _ (LCons( array-base _ )) )) )) (set r (+( (mangle-c-type array-base) (SAtom '*_s) ))) )
( (TGround( tag _ )) (
(if (.has-prefix( tag 'C__s )) (
(set r (SAtom (.replace( (.remove-prefix( tag 'C__s )) '__s '\s_s )) ))
(set tag (.remove-prefix( tag 'C__s )))
(set tag (.replace( tag '__s '\s_s )))
(set tag (.replace( tag '\s\s_s '__s )))
(set r (SAtom tag))
) (
(let c-type-name (.lookup( index-c-type-ordinal tt 'UnknownCTypename_s )))
(if (==( c-type-name 'UnknownCTypename_s )) () (
26 changes: 24 additions & 2 deletions PLUGINS/FRONTEND/C/c-parse.lm
Original file line number Diff line number Diff line change
@@ -178,7 +178,20 @@ c-print-token-sequence := λ(: from List<String>)(: to List<String>). (: (
) Nil);

c-mangle-typename := λ(: tokens List<String>). (: (
(.join( (cons( 'C_s tokens )) '__s ))
(let rs (: LEOF List<String>))
(let fancy 0_u64)
(for-each (t in (cons( 'C_s tokens ))) (
(set fancy (||( fancy (.contains( t '__s )) )))
(set rs (cons(
(.replace( t '__s '___s ))
rs
)))
))
(let r (.join( (reverse rs) '__s )))
(if fancy (
(set r (+( r '__s )))
) ())
r
) String);

c-parse-declaration := λ(: tokens List<String>). (: (
@@ -218,7 +231,7 @@ c-parse-declaration := λ(: tokens List<String>). (: (
(let arg-type (c-mangle-typename(.first at)))
(set tokens (.second at))
(let arg-name '_s)
(if (==( arg-type 'C_s )) (
(if (==( (.length arg-type) 0_u64 )) (
(if (non-zero tokens) (set tokens (tail tokens)) ())
(print 'Unrecognized\sType\sName\sIn\sFunction\sArgument\s_s)
(print return-type)(print '\s_s)(print name)(print '\s_s)
@@ -228,6 +241,15 @@ c-parse-declaration := λ(: tokens List<String>). (: (
(while (&&( (non-zero tokens) (==( (head tokens) '\[_s )) )) (
(set tokens (c-dump-parens tokens))
))
(while (&&( (non-zero tokens) (==( (head tokens) '[_s )) )) (
(set tokens (tail tokens)) # [
(while (&&( (non-zero tokens) (!=( (head tokens) ']_s )) )) (
(set tokens (tail tokens))
))
(set tokens (tail tokens)) # ]
(if (.has-suffix( arg-type '__s )) (set arg-type (.remove-suffix( arg-type '__s ))) ())
(set arg-type (+( arg-type '_*_s )))
))
(while (&&( (non-zero tokens) (==( (head tokens) '__attribute___s )) )) (
(set tokens (tail tokens))
(if (&&( (non-zero tokens) (==( (head tokens) '\[_s )) )) (set tokens (c-dump-parens tokens)) ())
58 changes: 51 additions & 7 deletions PLUGINS/FRONTEND/LSTS/lsts-parse.lm
Original file line number Diff line number Diff line change
@@ -373,10 +373,40 @@ lsts-parse-expression := λ(: tokens List<Token>). (: (
(Tuple( base tokens ))
) Tuple<AST,List<Token>>);

lsts-parse-lhs := λ(: tokens List<Token>). (: (
lsts-parse-lhs-big := λ(: tokens List<Token>). (: (
(let base-rest (lsts-parse-lhs-one tokens))
(let base (.first base-rest))
(set tokens (.second base-rest))
(if (==( (lsts-parse-head tokens) '=_s )) (scope(
(let loc (.location(head tokens)))
(lsts-parse-expect( '=_s tokens ))(set tokens (tail tokens))
(let val-rest (lsts-parse-lhs( tokens )))
(let val (.first val-rest))
(set tokens (.second val-rest))
(set base (App(
(close(Var( 'macro::lhs-bind_s (with-location( (token::new 'macro::lhs-bind_s) loc )) )))
(close(App(
(close base)
(close val)
)))
)))
)) ())
(Tuple( base tokens ))
) Tuple<AST,List<Token>>);

lsts-parse-lhs := λ(: tokens List<Token>). (: (
(let base ASTEOF)
(if (==( (lsts-parse-head tokens) '\[_s )) (scope(
(lsts-parse-expect( '\[_s tokens ))(set tokens (tail tokens))
(let base-rest (lsts-parse-lhs-big( tokens )))
(set base (.first base-rest))
(set tokens (.second base-rest))
(lsts-parse-expect( '\]_s tokens ))(set tokens (tail tokens))
)) (
(let base-rest (lsts-parse-lhs-one tokens))
(set base (.first base-rest))
(set tokens (.second base-rest))
))
(while (&&( (non-zero tokens) (==( (lsts-parse-head tokens) '._s )) )) (
(let loc (.location(head tokens)))
(lsts-parse-expect( '._s tokens ))(set tokens (tail tokens))
@@ -413,15 +443,29 @@ lsts-is-lit := λ(: s String). (: (

lsts-parse-lhs-one := λ(: tokens List<Token>). (: (
(let base ASTNil)
(if (lsts-is-ident-head(lsts-parse-head tokens)) (
(set base (lsts-make-maybe-var( (head tokens) )))
(set tokens (tail tokens))
(if (==( (lsts-parse-head tokens) '/_s )) (
(let loc (.location(head tokens)))
(lsts-parse-expect( '/_s tokens ))(set tokens (tail tokens))
(let pat SNil)
(while (&&( (non-zero tokens) (!=( (lsts-parse-head tokens) '/_s )) )) (
(set pat (+( pat (SAtom(lsts-parse-head tokens)) )))
(set tokens (tail tokens))
))
(lsts-parse-expect( '/_s tokens ))(set tokens (tail tokens))
(set pat (+( pat (SAtom '_rgx_s) )))
(let pats (clone-rope pat))
(set base (Lit( pats (with-location( (token::new pats) loc )) )))
) (
(if (lsts-is-lit(lsts-parse-head tokens)) (
(set base (Lit( (.key(head tokens)) (head tokens) )))
(if (lsts-is-ident-head(lsts-parse-head tokens)) (
(set base (lsts-make-maybe-var( (head tokens) )))
(set tokens (tail tokens))
) (
(lsts-parse-expect( 'LHS_s 0_u64 tokens ))
(if (lsts-is-lit(lsts-parse-head tokens)) (
(set base (Lit( (.key(head tokens)) (head tokens) )))
(set tokens (tail tokens))
) (
(lsts-parse-expect( 'LHS_s 0_u64 tokens ))
))
))
))
(Tuple( base tokens ))
1 change: 1 addition & 0 deletions PLUGINS/FRONTEND/LSTS/lsts-tokenize.lm
Original file line number Diff line number Diff line change
@@ -64,6 +64,7 @@ lsts-tokenize := λ(: fp String). (: (
(lsts-try-tokenize-keep '>_s)
(lsts-try-tokenize-keep '!_s)
(lsts-try-tokenize-keep '=_s)
(lsts-try-tokenize-keep '^_s)
(lsts-try-tokenize-discard '\s_s)
(lsts-try-tokenize-discard '\t_s)
(lsts-try-tokenize-discard '\n_s)
2 changes: 1 addition & 1 deletion SRC/infer-expr.lm
Original file line number Diff line number Diff line change
@@ -147,7 +147,7 @@ infer-expr-one := λ(: tctx TContext)(: term AST)(: scoped IsScoped)(: hint Type
(maybe-specialize( 'push-deref_s (typeof-var-raw( term tctx 'push-deref_s )) deref-type term ))
(set deref-type (and( array-base (t1 'StackVariable_s) )))
))
( _ (maybe-specialize( 'open_s (typeof-var-raw( term tctx 'open_s )) deref-type term )) )
( _ (do-specialize( 'open_s (typeof-var-raw( term tctx 'open_s )) deref-type term )) )
))
(ascript-normal( term deref-type ))
))
4 changes: 4 additions & 0 deletions SRC/maybe-specialize.lm
Original file line number Diff line number Diff line change
@@ -2,3 +2,7 @@
maybe-specialize := λ(: function-name String)(: ft Type)(: pt Type)(: blame AST). (: (
(apply( function-name ft pt 1_u64 blame 0_u64 )) ()
) Nil);

do-specialize := λ(: function-name String)(: ft Type)(: pt Type)(: blame AST). (: (
(apply( function-name ft pt 1_u64 blame 1_u64 )) ()
) Nil);

0 comments on commit a07d52f

Please sign in to comment.