diff --git a/src/Juvix/Compiler/Asm/Keywords.hs b/src/Juvix/Compiler/Asm/Keywords.hs new file mode 100644 index 0000000000..3e24779dae --- /dev/null +++ b/src/Juvix/Compiler/Asm/Keywords.hs @@ -0,0 +1,41 @@ +module Juvix.Compiler.Asm.Keywords + ( module Juvix.Compiler.Asm.Keywords, + module Juvix.Data.Keyword, + module Juvix.Data.Keyword.All, + ) +where + +import Juvix.Data.Keyword +import Juvix.Data.Keyword.All + ( kwArg, + kwColon, + kwDollar, + kwFalse, + kwFun, + kwInductive, + kwRightArrow, + kwSemicolon, + kwStar, + kwTmp, + kwTrue, + kwUnit, + kwVoid, + ) +import Juvix.Prelude + +allKeywordStrings :: HashSet Text +allKeywordStrings = keywordsStrings allKeywords + +allKeywords :: [Keyword] +allKeywords = + [ kwFun, + kwInductive, + kwColon, + kwSemicolon, + kwStar, + kwRightArrow, + kwTrue, + kwFalse, + kwArg, + kwTmp + ] diff --git a/src/Juvix/Compiler/Asm/Translation/FromSource.hs b/src/Juvix/Compiler/Asm/Translation/FromSource.hs index f26366c215..8bf2fbd67e 100644 --- a/src/Juvix/Compiler/Asm/Translation/FromSource.hs +++ b/src/Juvix/Compiler/Asm/Translation/FromSource.hs @@ -95,7 +95,7 @@ statementFunction :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r () statementFunction = do - kwFun + kw kwFun off <- P.getOffset (txt, i) <- identifierL idt <- lift $ getIdent txt @@ -119,7 +119,7 @@ statementFunction = do _functionType = mkTypeFun argtys (fromMaybe TyDynamic mrty) } lift $ registerFunction fi0 - mcode <- (kwSemicolon $> Nothing) <|> optional (braces parseCode) + mcode <- (kw kwSemicolon $> Nothing) <|> optional (braces parseCode) let fi = fi0 {_functionCode = fromMaybe [] mcode} case idt of Just (IdentFwd _) -> do @@ -141,7 +141,7 @@ statementInductive :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r () statementInductive = do - kwInductive + kw kwInductive off <- P.getOffset (txt, i) <- identifierL idt <- lift $ getIdent txt @@ -157,7 +157,7 @@ statementInductive = do _inductiveConstructors = [] } lift $ registerInductive ii - ctrs <- braces $ P.sepEndBy (constrDecl sym) kwSemicolon + ctrs <- braces $ P.sepEndBy (constrDecl sym) (kw kwSemicolon) lift $ registerInductive ii {_inductiveConstructors = ctrs} functionArguments :: @@ -199,7 +199,7 @@ typeAnnotation :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r Type typeAnnotation = do - kwColon + kw kwColon parseType parseType :: @@ -219,7 +219,7 @@ typeFun' :: NonEmpty Type -> ParsecS r Type typeFun' tyargs = do - kwArrow + kw kwRightArrow TyFun . TypeFun tyargs <$> parseType typeArguments :: @@ -230,8 +230,8 @@ typeArguments = do <|> (typeDynamic <&> NonEmpty.singleton) <|> (typeNamed <&> NonEmpty.singleton) -typeDynamic :: ParsecS r Type -typeDynamic = kwStar $> TyDynamic +typeDynamic :: Members '[Reader ParserParams] r => ParsecS r Type +typeDynamic = kw kwStar $> TyDynamic typeNamed :: Members '[Reader ParserParams, InfoTableBuilder] r => @@ -253,7 +253,7 @@ typeNamed = do parseCode :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r Code -parseCode = P.sepEndBy command kwSemicolon +parseCode = P.sepEndBy command (kw kwSemicolon) command :: Members '[Reader ParserParams, InfoTableBuilder] r => @@ -337,10 +337,10 @@ integerValue = do (i, _) <- integer return $ ConstInt i -boolValue :: ParsecS r Value +boolValue :: Members '[Reader ParserParams] r => ParsecS r Value boolValue = - (kwTrue $> ConstBool True) - <|> (kwFalse $> ConstBool False) + (kw kwTrue $> ConstBool True) + <|> (kw kwFalse $> ConstBool False) stringValue :: Members '[Reader ParserParams, InfoTableBuilder] r => @@ -349,11 +349,11 @@ stringValue = do (s, _) <- string return $ ConstString s -unitValue :: ParsecS r Value -unitValue = kwUnit $> ConstUnit +unitValue :: Members '[Reader ParserParams] r => ParsecS r Value +unitValue = kw kwUnit $> ConstUnit -voidValue :: ParsecS r Value -voidValue = kwVoid $> ConstVoid +voidValue :: Members '[Reader ParserParams] r => ParsecS r Value +voidValue = kw kwVoid $> ConstVoid memValue :: Members '[Reader ParserParams, InfoTableBuilder] r => @@ -367,14 +367,14 @@ directRef :: ParsecS r DirectRef directRef = stackRef <|> argRef <|> tempRef -stackRef :: ParsecS r DirectRef -stackRef = kwDollar $> StackRef +stackRef :: Members '[Reader ParserParams] r => ParsecS r DirectRef +stackRef = kw kwDollar $> StackRef argRef :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r DirectRef argRef = do - kwArg + kw kwArg (off, _) <- brackets integer return $ ArgRef (fromInteger off) @@ -382,7 +382,7 @@ tempRef :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r DirectRef tempRef = do - kwTmp + kw kwTmp (off, _) <- brackets integer return $ TempRef (fromInteger off) @@ -462,7 +462,7 @@ instrCall = do parseCallType :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r CallType -parseCallType = (kwDollar $> CallClosure) <|> (CallFun <$> funSymbol) +parseCallType = (kw kwDollar $> CallClosure) <|> (CallFun <$> funSymbol) instrCallClosures :: Members '[Reader ParserParams, InfoTableBuilder] r => @@ -495,7 +495,7 @@ caseBranch :: ParsecS r CaseBranch caseBranch = do tag <- P.try constrTag - kwColon + kw kwColon CaseBranch tag <$> branchCode defaultBranch :: diff --git a/src/Juvix/Compiler/Asm/Translation/FromSource/Lexer.hs b/src/Juvix/Compiler/Asm/Translation/FromSource/Lexer.hs index 7b19c3bcbf..c0fbe673ad 100644 --- a/src/Juvix/Compiler/Asm/Translation/FromSource/Lexer.hs +++ b/src/Juvix/Compiler/Asm/Translation/FromSource/Lexer.hs @@ -1,10 +1,11 @@ module Juvix.Compiler.Asm.Translation.FromSource.Lexer ( module Juvix.Compiler.Asm.Translation.FromSource.Lexer, module Juvix.Parser.Lexer, + module Juvix.Compiler.Asm.Keywords, ) where -import Juvix.Extra.Strings qualified as Str +import Juvix.Compiler.Asm.Keywords import Juvix.Parser.Lexer import Juvix.Prelude import Text.Megaparsec as P hiding (sepBy1, sepEndBy1, some) @@ -34,11 +35,8 @@ number = number' integer string :: Member (Reader ParserParams) r => ParsecS r (Text, Interval) string = lexemeInterval string' -keyword :: Text -> ParsecS r () -keyword = keyword' space - -keywordSymbol :: Text -> ParsecS r () -keywordSymbol = keywordSymbol' space +kw :: Members '[Reader ParserParams] r => Keyword -> ParsecS r () +kw k = void $ lexeme $ kw' k identifier :: ParsecS r Text identifier = lexeme bareIdentifier @@ -47,25 +45,11 @@ identifierL :: Member (Reader ParserParams) r => ParsecS r (Text, Interval) identifierL = lexemeInterval bareIdentifier bareIdentifier :: ParsecS r Text -bareIdentifier = rawIdentifier' (`elem` specialSymbols) allKeywords +bareIdentifier = rawIdentifier' (`elem` specialSymbols) allKeywordStrings specialSymbols :: [Char] specialSymbols = ":" -allKeywords :: [ParsecS r ()] -allKeywords = - [ kwFun, - kwInductive, - kwColon, - kwSemicolon, - kwStar, - kwArrow, - kwTrue, - kwFalse, - kwArg, - kwTmp - ] - dot :: ParsecS r () dot = symbol "." @@ -98,42 +82,3 @@ braces = between (symbol "{") (symbol "}") brackets :: ParsecS r a -> ParsecS r a brackets = between (symbol "[") (symbol "]") - -kwFun :: ParsecS r () -kwFun = keyword Str.fun_ - -kwInductive :: ParsecS r () -kwInductive = keyword Str.inductive - -kwColon :: ParsecS r () -kwColon = keyword Str.colon - -kwSemicolon :: ParsecS r () -kwSemicolon = keyword Str.semicolon - -kwStar :: ParsecS r () -kwStar = keyword Str.mul - -kwDollar :: ParsecS r () -kwDollar = keyword Str.dollar - -kwArrow :: ParsecS r () -kwArrow = keyword Str.toAscii <|> keyword Str.toUnicode - -kwTrue :: ParsecS r () -kwTrue = keyword Str.true_ - -kwFalse :: ParsecS r () -kwFalse = keyword Str.false_ - -kwUnit :: ParsecS r () -kwUnit = keyword Str.unit - -kwVoid :: ParsecS r () -kwVoid = keyword Str.void - -kwArg :: ParsecS r () -kwArg = keyword Str.arg_ - -kwTmp :: ParsecS r () -kwTmp = keyword Str.tmp_ diff --git a/src/Juvix/Compiler/Concrete/Keywords.hs b/src/Juvix/Compiler/Concrete/Keywords.hs new file mode 100644 index 0000000000..be308c6960 --- /dev/null +++ b/src/Juvix/Compiler/Concrete/Keywords.hs @@ -0,0 +1,85 @@ +module Juvix.Compiler.Concrete.Keywords + ( module Juvix.Compiler.Concrete.Keywords, + module Juvix.Data.Keyword, + module Juvix.Data.Keyword.All, + ) +where + +import Juvix.Data.Keyword +import Juvix.Data.Keyword.All + ( -- reserved + + -- extra + cBackend, + ghc, + kwAssign, + kwAxiom, + kwBuiltin, + kwColon, + kwColonOmega, + kwColonOne, + kwColonZero, + kwCompile, + kwEnd, + kwForeign, + kwHiding, + kwHole, + kwImport, + kwIn, + kwInductive, + kwInfix, + kwInfixl, + kwInfixr, + kwLambda, + kwLet, + kwMapsTo, + kwModule, + kwOpen, + kwPositive, + kwPostfix, + kwPublic, + kwRightArrow, + kwSemicolon, + kwTerminating, + kwType, + kwUsing, + kwWhere, + kwWildcard, + ) +import Juvix.Prelude + +allKeywordStrings :: HashSet Text +allKeywordStrings = keywordsStrings allKeywords + +allKeywords :: [Keyword] +allKeywords = + [ kwAssign, + kwAxiom, + kwColon, + kwColonOmega, + kwColonOne, + kwColonZero, + kwCompile, + kwEnd, + kwForeign, + kwHiding, + kwHole, + kwImport, + kwIn, + kwInductive, + kwInfix, + kwInfixl, + kwInfixr, + kwLambda, + kwLet, + kwModule, + kwOpen, + kwPostfix, + kwPublic, + kwRightArrow, + kwSemicolon, + kwType, + kwUsing, + kwWhere, + kwWildcard + ] diff --git a/src/Juvix/Compiler/Concrete/Translation/FromSource.hs b/src/Juvix/Compiler/Concrete/Translation/FromSource.hs index aad96ce28b..b70b5585ce 100644 --- a/src/Juvix/Compiler/Concrete/Translation/FromSource.hs +++ b/src/Juvix/Compiler/Concrete/Translation/FromSource.hs @@ -72,7 +72,7 @@ top :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r a -> ParsecS r a -top p = space >> p <* (optional kwSemicolon >> P.eof) +top p = space >> p <* (optional (kw kwSemicolon) >> P.eof) topModuleDef :: Members '[Reader ParserParams, InfoTableBuilder, JudocStash, NameIdGen] r => @@ -102,7 +102,7 @@ mkTopModulePath :: NonEmpty Symbol -> TopModulePath mkTopModulePath l = TopModulePath (NonEmpty.init l) (NonEmpty.last l) symbolList :: Members '[Reader ParserParams, InfoTableBuilder, JudocStash, NameIdGen] r => ParsecS r (NonEmpty Symbol) -symbolList = braces (P.sepBy1 symbol kwSemicolon) +symbolList = braces (P.sepBy1 symbol (kw kwSemicolon)) topModulePath :: Members '[Reader ParserParams, InfoTableBuilder, JudocStash, NameIdGen] r => ParsecS r TopModulePath topModulePath = mkTopModulePath <$> dottedSymbol @@ -151,7 +151,7 @@ stashJudoc = do P.try (judocStart >> judocExampleStart) uid <- P.lift freshNameId e <- parseExpressionAtoms - kwSemicolon + kw kwSemicolon space return (JudocExample (Example uid e)) @@ -193,7 +193,7 @@ builtinHelper :: ParsecS r a builtinHelper = P.choice - [ keyword (prettyText a) $> a + [ kw (asciiKw (prettyText a)) $> a | a <- allElements ] @@ -216,7 +216,7 @@ builtinTypeSig b = do builtinStatement :: Members '[Reader ParserParams, InfoTableBuilder, JudocStash, NameIdGen] r => ParsecS r (Statement 'Parsed) builtinStatement = do - kwBuiltin + kw kwBuiltin (builtinInductive >>= fmap StatementInductive . builtinInductiveDef) <|> (builtinFunction >>= fmap StatementTypeSignature . builtinTypeSig) <|> (builtinAxiom >>= fmap StatementAxiom . builtinAxiomDef) @@ -227,16 +227,16 @@ builtinStatement = do compileBlock :: forall r. Members '[Reader ParserParams, InfoTableBuilder, JudocStash, NameIdGen] r => ParsecS r (Compile 'Parsed) compileBlock = do - kwCompile + kw kwCompile _compileName <- symbol _compileBackendItems <- backends return Compile {..} where - backends = toList <$> braces (P.sepEndBy1 backendItem kwSemicolon) + backends = toList <$> braces (P.sepEndBy1 backendItem (kw kwSemicolon)) backendItem :: ParsecS r BackendItem backendItem = do _backendItemBackend <- backend - kwMapsTo + kw kwMapsTo _backendItemCode <- fst <$> string return BackendItem {..} @@ -245,11 +245,11 @@ compileBlock = do -------------------------------------------------------------------------------- backend :: Members '[Reader ParserParams, InfoTableBuilder, JudocStash, NameIdGen] r => ParsecS r Backend -backend = ghc $> BackendGhc <|> cBackend $> BackendC +backend = kw ghc $> BackendGhc <|> kw cBackend $> BackendC foreignBlock :: Members '[Reader ParserParams, InfoTableBuilder, JudocStash, NameIdGen] r => ParsecS r ForeignBlock foreignBlock = do - kwForeign + kw kwForeign _foreignBackend <- backend _foreignCode <- bracedString return ForeignBlock {..} @@ -271,10 +271,10 @@ operatorSyntaxDef = do where arity :: ParsecS r OperatorArity arity = - Binary AssocRight <$ kwInfixr - <|> Binary AssocLeft <$ kwInfixl - <|> Binary AssocNone <$ kwInfix - <|> Unary AssocPostfix <$ kwPostfix + Binary AssocRight <$ kw kwInfixr + <|> Binary AssocLeft <$ kw kwInfixl + <|> Binary AssocNone <$ kw kwInfix + <|> Unary AssocPostfix <$ kw kwPostfix -------------------------------------------------------------------------------- -- Import statement @@ -282,7 +282,7 @@ operatorSyntaxDef = do import_ :: Members '[Reader ParserParams, InfoTableBuilder, JudocStash, NameIdGen] r => ParsecS r (Import 'Parsed) import_ = do - kwImport + kw kwImport _importModule <- topModulePath return Import {..} @@ -299,7 +299,7 @@ expressionAtom = <|> (AtomLambda <$> lambda) <|> (AtomFunction <$> function) <|> (AtomLetBlock <$> letBlock) - <|> (AtomFunArrow <$ kwRightArrow) + <|> (AtomFunArrow <$ kw kwRightArrow) <|> (AtomHole <$> hole) <|> parens (AtomParens <$> parseExpressionAtoms) <|> braces (AtomBraces <$> withLoc parseExpressionAtoms) @@ -316,7 +316,7 @@ parseExpressionAtoms = do -------------------------------------------------------------------------------- hole :: Members '[Reader ParserParams, InfoTableBuilder, JudocStash, NameIdGen] r => ParsecS r (HoleType 'Parsed) -hole = snd <$> interval kwHole +hole = snd <$> interval (kw kwHole) -------------------------------------------------------------------------------- -- Literals @@ -344,9 +344,9 @@ letClause = either LetTypeSig LetFunClause <$> auxTypeSigFunClause letBlock :: Members '[Reader ParserParams, InfoTableBuilder, JudocStash, NameIdGen] r => ParsecS r (LetBlock 'Parsed) letBlock = do - kwLet - _letClauses <- braces (P.sepEndBy letClause kwSemicolon) - kwIn + kw kwLet + _letClauses <- braces (P.sepEndBy letClause (kw kwSemicolon)) + kw kwIn _letExpression <- parseExpressionAtoms return LetBlock {..} @@ -356,7 +356,7 @@ letBlock = do universe :: Members '[Reader ParserParams, InfoTableBuilder, JudocStash, NameIdGen] r => ParsecS r Universe universe = do - i <- snd <$> interval kwType + i <- snd <$> interval (kw kwType) uni <- optional decimal return ( case uni of @@ -377,7 +377,7 @@ typeSignature :: Maybe BuiltinFunction -> ParsecS r (TypeSignature 'Parsed) typeSignature _sigTerminating _sigName _sigBuiltin = do - kwColon + kw kwColon _sigType <- parseExpressionAtoms _sigDoc <- getJudoc return TypeSignature {..} @@ -387,7 +387,7 @@ auxTypeSigFunClause :: Members '[Reader ParserParams, InfoTableBuilder, JudocStash, NameIdGen] r => ParsecS r (Either (TypeSignature 'Parsed) (FunctionClause 'Parsed)) auxTypeSigFunClause = do - terminating <- isJust <$> optional kwTerminating + terminating <- isJust <$> optional (kw kwTerminating) sym <- symbol (Left <$> typeSignature terminating sym Nothing) <|> (Right <$> functionClause sym) @@ -397,10 +397,10 @@ axiomDef :: Maybe BuiltinAxiom -> ParsecS r (AxiomDef 'Parsed) axiomDef _axiomBuiltin = do - kwAxiom + kw kwAxiom _axiomDoc <- getJudoc _axiomName <- symbol - kwColon + kw kwColon _axiomType <- parseExpressionAtoms return AxiomDef {..} @@ -432,18 +432,18 @@ functionParam = do pName :: ParsecS r (Maybe Symbol) pName = (Just <$> symbol) - <|> (Nothing <$ kwWildcard) + <|> (Nothing <$ kw kwWildcard) pUsage :: ParsecS r (Maybe Usage) pUsage = - (Just UsageNone <$ kwColonZero) - <|> (Just UsageOnce <$ kwColonOne) - <|> (Just UsageOmega <$ kwColonOmega) - <|> (Nothing <$ kwColon) + (Just UsageNone <$ kw kwColonZero) + <|> (Just UsageOnce <$ kw kwColonOne) + <|> (Just UsageOmega <$ kw kwColonOmega) + <|> (Nothing <$ kw kwColon) function :: Members '[Reader ParserParams, InfoTableBuilder, JudocStash, NameIdGen] r => ParsecS r (Function 'Parsed) function = do _funParameter <- functionParam - kwRightArrow + kw kwRightArrow _funReturn <- parseExpressionAtoms return Function {..} @@ -453,8 +453,8 @@ function = do whereBlock :: Members '[Reader ParserParams, InfoTableBuilder, JudocStash, NameIdGen] r => ParsecS r (WhereBlock 'Parsed) whereBlock = do - kwWhere - WhereBlock <$> braces (P.sepEndBy1 whereClause kwSemicolon) + kw kwWhere + WhereBlock <$> braces (P.sepEndBy1 whereClause (kw kwSemicolon)) whereClause :: forall r. Members '[Reader ParserParams, InfoTableBuilder, JudocStash, NameIdGen] r => ParsecS r (WhereClause 'Parsed) whereClause = @@ -471,14 +471,14 @@ whereClause = lambdaClause :: Members '[Reader ParserParams, InfoTableBuilder, JudocStash, NameIdGen] r => ParsecS r (LambdaClause 'Parsed) lambdaClause = do _lambdaParameters <- P.some patternAtom - kwAssign + kw kwAssign _lambdaBody <- parseExpressionAtoms return LambdaClause {..} lambda :: Members '[Reader ParserParams, InfoTableBuilder, JudocStash, NameIdGen] r => ParsecS r (Lambda 'Parsed) lambda = do - kwLambda - _lambdaClauses <- braces (P.sepEndBy lambdaClause kwSemicolon) + kw kwLambda + _lambdaClauses <- braces (P.sepEndBy lambdaClause (kw kwSemicolon)) return Lambda {..} ------------------------------------------------------------------------------- @@ -487,19 +487,19 @@ lambda = do inductiveDef :: Members '[Reader ParserParams, InfoTableBuilder, JudocStash, NameIdGen] r => Maybe BuiltinInductive -> ParsecS r (InductiveDef 'Parsed) inductiveDef _inductiveBuiltin = do - _inductivePositive <- isJust <$> optional kwPositive - kwInductive + _inductivePositive <- isJust <$> optional (kw kwPositive) + kw kwInductive _inductiveDoc <- getJudoc _inductiveName <- symbol _inductiveParameters <- P.many inductiveParam - _inductiveType <- optional (kwColon >> parseExpressionAtoms) - _inductiveConstructors <- braces $ P.sepEndBy constructorDef kwSemicolon + _inductiveType <- optional (kw kwColon >> parseExpressionAtoms) + _inductiveConstructors <- braces $ P.sepEndBy constructorDef (kw kwSemicolon) return InductiveDef {..} inductiveParam :: Members '[Reader ParserParams, InfoTableBuilder, JudocStash, NameIdGen] r => ParsecS r (InductiveParameter 'Parsed) inductiveParam = parens $ do _inductiveParameterName <- symbol - kwColon + kw kwColon _inductiveParameterType <- parseExpressionAtoms return InductiveParameter {..} @@ -507,12 +507,12 @@ constructorDef :: Members '[Reader ParserParams, InfoTableBuilder, JudocStash, N constructorDef = do _constructorDoc <- optional stashJudoc >> getJudoc _constructorName <- symbol - kwColon + kw kwColon _constructorType <- parseExpressionAtoms return InductiveConstructorDef {..} wildcard :: Members '[Reader ParserParams, InfoTableBuilder, JudocStash, NameIdGen] r => ParsecS r Wildcard -wildcard = Wildcard . snd <$> interval kwWildcard +wildcard = Wildcard . snd <$> interval (kw kwWildcard) -------------------------------------------------------------------------------- -- Pattern section @@ -538,7 +538,7 @@ parsePatternAtoms = do functionClause :: Members '[Reader ParserParams, InfoTableBuilder, JudocStash, NameIdGen] r => Symbol -> ParsecS r (FunctionClause 'Parsed) functionClause _clauseOwnerFunction = do _clausePatterns <- P.many patternAtom - kwAssign + kw kwAssign _clauseBody <- parseExpressionAtoms _clauseWhere <- optional whereBlock return FunctionClause {..} @@ -554,13 +554,13 @@ pmodulePath = case sing :: SModuleIsTop t of moduleDef :: (SingI t, Members '[Reader ParserParams, InfoTableBuilder, JudocStash, NameIdGen] r) => ParsecS r (Module 'Parsed t) moduleDef = P.label "" $ do - kwModule + kw kwModule _moduleDoc <- getJudoc _modulePath <- pmodulePath _moduleParameters <- many inductiveParam - kwSemicolon - _moduleBody <- P.sepEndBy statement kwSemicolon - kwEnd + kw kwSemicolon + _moduleBody <- P.sepEndBy statement (kw kwSemicolon) + kw kwEnd return Module {..} -- | An ExpressionAtom which is a valid expression on its own. @@ -574,15 +574,15 @@ atomicExpression = do openModule :: forall r. Members '[Reader ParserParams, InfoTableBuilder, JudocStash, NameIdGen] r => ParsecS r (OpenModule 'Parsed) openModule = do - kwOpen - _openModuleImport <- isJust <$> optional kwImport + kw kwOpen + _openModuleImport <- isJust <$> optional (kw kwImport) _openModuleName <- name _openParameters <- many atomicExpression _openUsingHiding <- optional usingOrHiding - _openPublic <- maybe NoPublic (const Public) <$> optional kwPublic + _openPublic <- maybe NoPublic (const Public) <$> optional (kw kwPublic) return OpenModule {..} where usingOrHiding :: ParsecS r UsingHiding usingOrHiding = - (kwUsing >> (Using <$> symbolList)) - <|> (kwHiding >> (Hiding <$> symbolList)) + (kw kwUsing >> (Using <$> symbolList)) + <|> (kw kwHiding >> (Hiding <$> symbolList)) diff --git a/src/Juvix/Compiler/Concrete/Translation/FromSource/Lexer.hs b/src/Juvix/Compiler/Concrete/Translation/FromSource/Lexer.hs index e3a9bd3846..ffbccc483b 100644 --- a/src/Juvix/Compiler/Concrete/Translation/FromSource/Lexer.hs +++ b/src/Juvix/Compiler/Concrete/Translation/FromSource/Lexer.hs @@ -1,6 +1,8 @@ module Juvix.Compiler.Concrete.Translation.FromSource.Lexer ( module Juvix.Compiler.Concrete.Translation.FromSource.Lexer, module Juvix.Parser.Lexer, + module Juvix.Data.Keyword, + module Juvix.Compiler.Concrete.Keywords, ) where @@ -9,6 +11,8 @@ import GHC.Unicode import Juvix.Compiler.Concrete.Data.ParsedInfoTableBuilder import Juvix.Compiler.Concrete.Extra hiding (Pos, space, string') import Juvix.Compiler.Concrete.Extra qualified as P +import Juvix.Compiler.Concrete.Keywords +import Juvix.Data.Keyword import Juvix.Extra.Strings qualified as Str import Juvix.Parser.Lexer import Juvix.Prelude @@ -76,18 +80,12 @@ judocStart = P.chunk Str.judocStart >> hspace judocEmptyLine :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r () judocEmptyLine = lexeme (void (P.try (judocStart >> P.newline))) --- | We use the ascii version in the label so it shows in the error messages. -keywordUnicode :: Members '[Reader ParserParams, InfoTableBuilder] r => Text -> Text -> ParsecS r () -keywordUnicode ascii unic = P.label (unpack ascii) (keyword ascii <|> keyword unic) - -keyword :: Members '[Reader ParserParams, InfoTableBuilder] r => Text -> ParsecS r () -keyword kw = do - l <- keywordL' space kw - lift (registerKeyword l) +kw :: Members '[Reader ParserParams, InfoTableBuilder] r => Keyword -> ParsecS r () +kw k = lexeme $ kw' k >>= P.lift . registerKeyword -- | Same as @identifier@ but does not consume space after it. bareIdentifier :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r (Text, Interval) -bareIdentifier = interval $ rawIdentifier allKeywords +bareIdentifier = interval (rawIdentifier allKeywordStrings) dot :: forall e m. MonadParsec e Text m => m Char dot = P.char '.' @@ -95,39 +93,6 @@ dot = P.char '.' dottedIdentifier :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r (NonEmpty (Text, Interval)) dottedIdentifier = lexeme $ P.sepBy1 bareIdentifier dot -allKeywords :: Members '[Reader ParserParams, InfoTableBuilder] r => [ParsecS r ()] -allKeywords = - [ kwAssign, - kwAxiom, - kwColon, - kwColonOmega, - kwColonOne, - kwColonZero, - kwCompile, - kwEnd, - kwForeign, - kwHiding, - kwHole, - kwImport, - kwIn, - kwInductive, - kwInfix, - kwInfixl, - kwInfixr, - kwLambda, - kwLet, - kwModule, - kwOpen, - kwPostfix, - kwPublic, - kwRightArrow, - kwSemicolon, - kwType, - kwUsing, - kwWhere, - kwWildcard - ] - lbrace :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r () lbrace = symbol "{" @@ -145,108 +110,3 @@ parens = between lparen rparen braces :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r a -> ParsecS r a braces = between (symbol "{") (symbol "}") - -kwBuiltin :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r () -kwBuiltin = keyword Str.builtin - -kwAssign :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r () -kwAssign = keyword Str.assignAscii - -kwAxiom :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r () -kwAxiom = keyword Str.axiom - -kwColon :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r () -kwColon = keyword Str.colon - -kwColonOmega :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r () -kwColonOmega = keywordUnicode Str.colonOmegaAscii Str.colonOmegaUnicode - -kwColonOne :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r () -kwColonOne = keyword Str.colonOne - -kwColonZero :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r () -kwColonZero = keyword Str.colonZero - -kwCompile :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r () -kwCompile = keyword Str.compile - -kwEnd :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r () -kwEnd = keyword Str.end - -kwHiding :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r () -kwHiding = keyword Str.hiding - -kwImport :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r () -kwImport = keyword Str.import_ - -kwForeign :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r () -kwForeign = keyword Str.foreign_ - -kwIn :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r () -kwIn = keyword Str.in_ - -kwInductive :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r () -kwInductive = keyword Str.inductive - -kwInfix :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r () -kwInfix = keyword Str.infix_ - -kwInfixl :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r () -kwInfixl = keyword Str.infixl_ - -kwInfixr :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r () -kwInfixr = keyword Str.infixr_ - -kwLambda :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r () -kwLambda = keywordUnicode Str.lambdaAscii Str.lambdaUnicode - -kwLet :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r () -kwLet = keyword Str.let_ - -kwMapsTo :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r () -kwMapsTo = keywordUnicode Str.mapstoAscii Str.mapstoUnicode - -kwModule :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r () -kwModule = keyword Str.module_ - -kwOpen :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r () -kwOpen = keyword Str.open - -kwPostfix :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r () -kwPostfix = keyword Str.postfix - -kwPublic :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r () -kwPublic = keyword Str.public - -kwRightArrow :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r () -kwRightArrow = keywordUnicode Str.toAscii Str.toUnicode - -kwSemicolon :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r () -kwSemicolon = keyword Str.semicolon - -kwType :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r () -kwType = keyword Str.type_ - -kwTerminating :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r () -kwTerminating = keyword Str.terminating - -kwPositive :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r () -kwPositive = keyword Str.positive - -kwUsing :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r () -kwUsing = keyword Str.using - -kwWhere :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r () -kwWhere = keyword Str.where_ - -kwHole :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r () -kwHole = keyword Str.underscore - -kwWildcard :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r () -kwWildcard = keyword Str.underscore - -ghc :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r () -ghc = keyword Str.ghc - -cBackend :: Members '[Reader ParserParams, InfoTableBuilder] r => ParsecS r () -cBackend = keyword Str.cBackend diff --git a/src/Juvix/Compiler/Core/Keywords.hs b/src/Juvix/Compiler/Core/Keywords.hs new file mode 100644 index 0000000000..d1ab1c4fb2 --- /dev/null +++ b/src/Juvix/Compiler/Core/Keywords.hs @@ -0,0 +1,80 @@ +module Juvix.Compiler.Core.Keywords + ( module Juvix.Compiler.Core.Keywords, + module Juvix.Data.Keyword, + module Juvix.Data.Keyword.All, + ) +where + +import Juvix.Data.Keyword +import Juvix.Data.Keyword.All + ( kwAssign, + kwBind, + kwCase, + kwComma, + kwConstr, + kwDef, + kwDiv, + kwElse, + kwEq, + kwFail, + kwGe, + kwGt, + kwIf, + kwIn, + kwLe, + kwLet, + kwLetRec, + kwLt, + kwMatch, + kwMinus, + kwMod, + kwMul, + kwOf, + kwPlus, + kwRightArrow, + kwSemicolon, + kwSeq, + kwThen, + kwTrace, + kwWildcard, + kwWith, + ) +import Juvix.Prelude + +allKeywordStrings :: HashSet Text +allKeywordStrings = keywordsStrings allKeywords + +allKeywords :: [Keyword] +allKeywords = + [ kwAssign, + kwLet, + kwLetRec, + kwIn, + kwConstr, + kwCase, + kwOf, + kwMatch, + kwWith, + kwIf, + kwThen, + kwElse, + kwDef, + kwRightArrow, + kwSemicolon, + kwComma, + kwWildcard, + kwPlus, + kwMinus, + kwMul, + kwDiv, + kwMod, + kwEq, + kwLt, + kwLe, + kwGt, + kwGe, + kwBind, + kwSeq, + kwTrace, + kwFail + ] diff --git a/src/Juvix/Compiler/Core/Translation/FromSource.hs b/src/Juvix/Compiler/Core/Translation/FromSource.hs index 59fefa9899..b34a8cd95f 100644 --- a/src/Juvix/Compiler/Core/Translation/FromSource.hs +++ b/src/Juvix/Compiler/Core/Translation/FromSource.hs @@ -109,7 +109,7 @@ parseToplevel :: parseToplevel = do declareBuiltins space - P.endBy statement kwSemicolon + P.endBy statement (kw kwSemicolon) r <- optional expression P.eof return r @@ -123,7 +123,7 @@ statementDef :: Members '[Reader ParserParams, InfoTableBuilder, NameIdGen] r => ParsecS r () statementDef = do - kwDef + kw kwDef off <- P.getOffset (txt, i) <- identifierL r <- lift (getIdent txt) @@ -155,7 +155,7 @@ parseDefinition :: Symbol -> ParsecS r () parseDefinition sym = do - kwAssign + kw kwAssign node <- expression lift $ registerIdentNode sym node let (is, _) = unfoldLambdas node @@ -175,7 +175,7 @@ statementConstr :: Members '[Reader ParserParams, InfoTableBuilder, NameIdGen] r => ParsecS r () statementConstr = do - kwConstr + kw kwConstr off <- P.getOffset (txt, i) <- identifierL (argsNum, _) <- number 0 128 @@ -242,7 +242,7 @@ bindExpr' :: Node -> ParsecS r Node bindExpr' varsNum vars node = do - kwBind + kw kwBind node' <- cmpExpr varsNum vars ioExpr' varsNum vars (mkConstr Info.empty (BuiltinTag TagBind) [node, node']) @@ -253,7 +253,7 @@ seqExpr' :: Node -> ParsecS r Node seqExpr' varsNum vars node = do - ((), i) <- interval kwSeq + ((), i) <- interval (kw kwSeq) node' <- cmpExpr (varsNum + 1) vars name <- lift $ freshName KNameLocal "_" i ioExpr' varsNum vars $ @@ -290,7 +290,7 @@ eqExpr' :: Node -> ParsecS r Node eqExpr' varsNum vars node = do - kwEq + kw kwEq node' <- arithExpr varsNum vars return $ mkBuiltinApp' OpEq [node, node'] @@ -301,7 +301,7 @@ ltExpr' :: Node -> ParsecS r Node ltExpr' varsNum vars node = do - kwLt + kw kwLt node' <- arithExpr varsNum vars return $ mkBuiltinApp' OpIntLt [node, node'] @@ -312,7 +312,7 @@ leExpr' :: Node -> ParsecS r Node leExpr' varsNum vars node = do - kwLe + kw kwLe node' <- arithExpr varsNum vars return $ mkBuiltinApp' OpIntLe [node, node'] @@ -323,7 +323,7 @@ gtExpr' :: Node -> ParsecS r Node gtExpr' varsNum vars node = do - kwGt + kw kwGt node' <- arithExpr varsNum vars return $ mkBuiltinApp' OpIntLt [node', node] @@ -334,7 +334,7 @@ geExpr' :: Node -> ParsecS r Node geExpr' varsNum vars node = do - kwGe + kw kwGe node' <- arithExpr varsNum vars return $ mkBuiltinApp' OpIntLe [node', node] @@ -363,7 +363,7 @@ plusExpr' :: Node -> ParsecS r Node plusExpr' varsNum vars node = do - kwPlus + kw kwPlus node' <- factorExpr varsNum vars arithExpr' varsNum vars (mkBuiltinApp' OpIntAdd [node, node']) @@ -374,7 +374,7 @@ minusExpr' :: Node -> ParsecS r Node minusExpr' varsNum vars node = do - kwMinus + kw kwMinus node' <- factorExpr varsNum vars arithExpr' varsNum vars (mkBuiltinApp' OpIntSub [node, node']) @@ -404,7 +404,7 @@ mulExpr' :: Node -> ParsecS r Node mulExpr' varsNum vars node = do - kwMul + kw kwMul node' <- appExpr varsNum vars factorExpr' varsNum vars (mkBuiltinApp' OpIntMul [node, node']) @@ -415,7 +415,7 @@ divExpr' :: Node -> ParsecS r Node divExpr' varsNum vars node = do - kwDiv + kw kwDiv node' <- appExpr varsNum vars factorExpr' varsNum vars (mkBuiltinApp' OpIntDiv [node, node']) @@ -426,7 +426,7 @@ modExpr' :: Node -> ParsecS r Node modExpr' varsNum vars node = do - kwMod + kw kwMod node' <- appExpr varsNum vars factorExpr' varsNum vars (mkBuiltinApp' OpIntMod [node, node']) @@ -444,15 +444,15 @@ builtinAppExpr :: ParsecS r Node builtinAppExpr varsNum vars = do op <- - (kwEq >> return OpEq) - <|> (kwLt >> return OpIntLt) - <|> (kwLe >> return OpIntLe) - <|> (kwPlus >> return OpIntAdd) - <|> (kwMinus >> return OpIntSub) - <|> (kwDiv >> return OpIntDiv) - <|> (kwMul >> return OpIntMul) - <|> (kwTrace >> return OpTrace) - <|> (kwFail >> return OpFail) + (kw kwEq >> return OpEq) + <|> (kw kwLt >> return OpIntLt) + <|> (kw kwLe >> return OpIntLe) + <|> (kw kwPlus >> return OpIntAdd) + <|> (kw kwMinus >> return OpIntSub) + <|> (kw kwDiv >> return OpIntDiv) + <|> (kw kwMul >> return OpIntMul) + <|> (kw kwTrace >> return OpTrace) + <|> (kw kwFail >> return OpFail) args <- P.many (atom varsNum vars) return $ mkBuiltinApp' op args @@ -530,7 +530,7 @@ parseLocalName = parseWildcardName <|> parseIdentName where parseWildcardName :: ParsecS r Name parseWildcardName = do - ((), i) <- interval kwWildcard + ((), i) <- interval (kw kwWildcard) lift $ freshName KNameLocal "_" i parseIdentName :: ParsecS r Name @@ -556,12 +556,12 @@ exprLetrecOne :: HashMap Text Index -> ParsecS r Node exprLetrecOne varsNum vars = do - kwLetRec + kw kwLetRec name <- parseLocalName - kwAssign + kw kwAssign let vars' = HashMap.insert (name ^. nameText) varsNum vars value <- expr (varsNum + 1) vars' - kwIn + kw kwIn body <- expr (varsNum + 1) vars' return $ mkLetRec (Info.singleton (BindersInfo [Info.singleton (NameInfo name)])) (fromList [value]) body @@ -572,7 +572,7 @@ exprLetrecMany :: ParsecS r Node exprLetrecMany varsNum vars = do off <- P.getOffset - defNames <- P.try (kwLetRec >> letrecNames) + defNames <- P.try (kw kwLetRec >> letrecNames) when (null defNames) $ parseFailure off "expected at least one identifier name in letrec signature" let (vars', varsNum') = foldl' (\(vs, k) txt -> (HashMap.insert txt k vs, k + 1)) (vars, varsNum) defNames @@ -599,11 +599,11 @@ letrecDefs names varsNum vars = case names of when (n /= txt) $ parseFailure off "identifier name doesn't match letrec signature" name <- lift $ freshName KNameLocal txt i - kwAssign + kw kwAssign v <- expr varsNum vars if - | null names' -> optional kwSemicolon >> kwIn - | otherwise -> kwSemicolon + | null names' -> optional (kw kwSemicolon) >> kw kwIn + | otherwise -> kw kwSemicolon rest <- letrecDefs names' varsNum vars return $ (name, v) : rest @@ -615,7 +615,7 @@ letrecDef :: letrecDef varsNum vars = do (txt, i) <- identifierL name <- lift $ freshName KNameLocal txt i - kwAssign + kw kwAssign v <- expr varsNum vars return (name, v) @@ -625,11 +625,11 @@ exprLet :: HashMap Text Index -> ParsecS r Node exprLet varsNum vars = do - kwLet + kw kwLet name <- parseLocalName - kwAssign + kw kwAssign value <- expr varsNum vars - kwIn + kw kwIn let vars' = HashMap.insert (name ^. nameText) varsNum vars body <- expr (varsNum + 1) vars' return $ mkLet (binderNameInfo name) value body @@ -641,9 +641,9 @@ exprCase :: ParsecS r Node exprCase varsNum vars = do off <- P.getOffset - kwCase + kw kwCase value <- expr varsNum vars - kwOf + kw kwOf braces (exprCase' off value varsNum vars) <|> exprCase' off value varsNum vars @@ -655,7 +655,7 @@ exprCase' :: HashMap Text Index -> ParsecS r Node exprCase' off value varsNum vars = do - bs <- P.sepEndBy (caseBranchP varsNum vars) kwSemicolon + bs <- P.sepEndBy (caseBranchP varsNum vars) (kw kwSemicolon) let bss = map fromLeft' $ filter isLeft bs let def' = map fromRight' $ filter isRight bs case def' of @@ -681,8 +681,8 @@ caseDefaultBranch :: HashMap Text Index -> ParsecS r Node caseDefaultBranch varsNum vars = do - kwWildcard - kwAssign + kw kwWildcard + kw kwAssign expr varsNum vars caseMatchingBranch :: @@ -704,7 +704,7 @@ caseMatchingBranch varsNum vars = do when (ci ^. constructorArgsNum /= bindersNum) (parseFailure off "wrong number of constructor arguments") - kwAssign + kw kwAssign let vars' = fst $ foldl' @@ -725,11 +725,11 @@ exprIf :: HashMap Text Index -> ParsecS r Node exprIf varsNum vars = do - kwIf + kw kwIf value <- expr varsNum vars - kwThen + kw kwThen br1 <- expr varsNum vars - kwElse + kw kwElse br2 <- expr varsNum vars return $ mkIf Info.empty value br1 br2 @@ -739,9 +739,9 @@ exprMatch :: HashMap Text Index -> ParsecS r Node exprMatch varsNum vars = do - kwMatch - values <- P.sepBy (expr varsNum vars) kwComma - kwWith + kw kwMatch + values <- P.sepBy (expr varsNum vars) (kw kwComma) + kw kwWith braces (exprMatch' values varsNum vars) <|> exprMatch' values varsNum vars @@ -752,7 +752,7 @@ exprMatch' :: HashMap Text Index -> ParsecS r Node exprMatch' values varsNum vars = do - bs <- P.sepEndBy (matchBranch (length values) varsNum vars) kwSemicolon + bs <- P.sepEndBy (matchBranch (length values) varsNum vars) (kw kwSemicolon) return $ mkMatch' (fromList values) bs matchBranch :: @@ -763,8 +763,8 @@ matchBranch :: ParsecS r MatchBranch matchBranch patsNum varsNum vars = do off <- P.getOffset - pats <- P.sepBy branchPattern kwComma - kwAssign + pats <- P.sepBy branchPattern (kw kwComma) + kw kwAssign unless (length pats == patsNum) $ parseFailure off "wrong number of patterns" let pis = concatMap (reverse . getBinderPatternInfos) pats @@ -786,9 +786,9 @@ branchPattern = <|> binderOrConstrPattern True <|> parens branchPattern -wildcardPattern :: ParsecS r Pattern +wildcardPattern :: Members '[Reader ParserParams] r => ParsecS r Pattern wildcardPattern = do - kwWildcard + kw kwWildcard return $ PatWildcard (PatternWildcard Info.empty) binderOrConstrPattern :: diff --git a/src/Juvix/Compiler/Core/Translation/FromSource/Lexer.hs b/src/Juvix/Compiler/Core/Translation/FromSource/Lexer.hs index b53c9dff06..5faa7d3503 100644 --- a/src/Juvix/Compiler/Core/Translation/FromSource/Lexer.hs +++ b/src/Juvix/Compiler/Core/Translation/FromSource/Lexer.hs @@ -1,9 +1,11 @@ module Juvix.Compiler.Core.Translation.FromSource.Lexer ( module Juvix.Compiler.Core.Translation.FromSource.Lexer, module Juvix.Parser.Lexer, + module Juvix.Compiler.Core.Keywords, ) where +import Juvix.Compiler.Core.Keywords import Juvix.Extra.Strings qualified as Str import Juvix.Parser.Lexer import Juvix.Prelude @@ -22,6 +24,9 @@ lexemeInterval = lexeme . interval symbol :: Text -> ParsecS r () symbol = void . L.symbol space +kw :: Members '[Reader ParserParams] r => Keyword -> ParsecS r () +kw = void . lexeme . kw' + decimal :: (Member (Reader ParserParams) r, Num n) => ParsecS r (n, Interval) decimal = lexemeInterval L.decimal @@ -34,12 +39,6 @@ number = number' integer string :: Member (Reader ParserParams) r => ParsecS r (Text, Interval) string = lexemeInterval string' -keyword :: Text -> ParsecS r () -keyword = keyword' space - -keywordSymbol :: Text -> ParsecS r () -keywordSymbol = keywordSymbol' space - identifier :: ParsecS r Text identifier = lexeme bareIdentifier @@ -48,42 +47,7 @@ identifierL = lexemeInterval bareIdentifier -- | Same as @identifier@ but does not consume space after it. bareIdentifier :: ParsecS r Text -bareIdentifier = rawIdentifier allKeywords - -allKeywords :: [ParsecS r ()] -allKeywords = - [ kwAssign, - kwLet, - kwLetRec, - kwIn, - kwConstr, - kwCase, - kwOf, - kwMatch, - kwWith, - kwIf, - kwThen, - kwElse, - kwDef, - kwRightArrow, - kwSemicolon, - kwComma, - kwWildcard, - kwPlus, - kwMinus, - kwMul, - kwDiv, - kwMod, - kwEq, - kwLt, - kwLe, - kwGt, - kwGe, - kwBind, - kwSeq, - kwTrace, - kwFail - ] +bareIdentifier = rawIdentifier allKeywordStrings symbolAt :: ParsecS r () symbolAt = symbol Str.at_ @@ -108,96 +72,3 @@ parens = between lparen rparen braces :: ParsecS r a -> ParsecS r a braces = between (symbol "{") (symbol "}") - -kwAssign :: ParsecS r () -kwAssign = keyword Str.assignAscii - -kwLet :: ParsecS r () -kwLet = keyword Str.let_ - -kwLetRec :: ParsecS r () -kwLetRec = keyword Str.letrec_ - -kwIn :: ParsecS r () -kwIn = keyword Str.in_ - -kwConstr :: ParsecS r () -kwConstr = keyword Str.constr - -kwCase :: ParsecS r () -kwCase = keyword Str.case_ - -kwOf :: ParsecS r () -kwOf = keyword Str.of_ - -kwMatch :: ParsecS r () -kwMatch = keyword Str.match_ - -kwWith :: ParsecS r () -kwWith = keyword Str.with_ - -kwIf :: ParsecS r () -kwIf = keyword Str.if_ - -kwThen :: ParsecS r () -kwThen = keyword Str.then_ - -kwElse :: ParsecS r () -kwElse = keyword Str.else_ - -kwDef :: ParsecS r () -kwDef = keyword Str.def - -kwRightArrow :: ParsecS r () -kwRightArrow = keyword Str.toUnicode <|> keyword Str.toAscii - -kwSemicolon :: ParsecS r () -kwSemicolon = keyword Str.semicolon - -kwComma :: ParsecS r () -kwComma = keywordSymbol Str.comma - -kwWildcard :: ParsecS r () -kwWildcard = keyword Str.underscore - -kwPlus :: ParsecS r () -kwPlus = keyword Str.plus - -kwMinus :: ParsecS r () -kwMinus = keyword Str.minus - -kwMul :: ParsecS r () -kwMul = keyword Str.mul - -kwDiv :: ParsecS r () -kwDiv = keyword Str.div - -kwMod :: ParsecS r () -kwMod = keyword Str.mod - -kwEq :: ParsecS r () -kwEq = keyword Str.equal - -kwLt :: ParsecS r () -kwLt = keyword Str.less - -kwLe :: ParsecS r () -kwLe = keyword Str.lessEqual - -kwGt :: ParsecS r () -kwGt = keyword Str.greater - -kwGe :: ParsecS r () -kwGe = keyword Str.greaterEqual - -kwBind :: ParsecS r () -kwBind = keyword Str.bind - -kwSeq :: ParsecS r () -kwSeq = keyword Str.seq_ - -kwTrace :: ParsecS r () -kwTrace = keyword Str.trace_ - -kwFail :: ParsecS r () -kwFail = keyword Str.fail_ diff --git a/src/Juvix/Data/Keyword.hs b/src/Juvix/Data/Keyword.hs new file mode 100644 index 0000000000..9d8b1bd9e1 --- /dev/null +++ b/src/Juvix/Data/Keyword.hs @@ -0,0 +1,46 @@ +module Juvix.Data.Keyword where + +import Data.HashSet qualified as HashSet +import Juvix.Prelude +import Juvix.Prelude.Pretty + +data Keyword = Keyword + { _keywordAscii :: Text, + _keywordUnicode :: Maybe Text, + -- | true if _keywordAscii has a reserved character (the unicode is assumed to not have any) + _keywordHasReserved :: Bool + } + +makeLenses ''Keyword + +-- | Unicode has preference +instance Pretty Keyword where + pretty k = maybe (pretty (k ^. keywordAscii)) pretty (k ^. keywordUnicode) + +keywordsStrings :: [Keyword] -> HashSet Text +keywordsStrings = HashSet.fromList . concatMap keywordStrings + +keywordStrings :: Keyword -> [Text] +keywordStrings Keyword {..} = maybe id (:) _keywordUnicode [_keywordAscii] + +mkKw :: Text -> Maybe Text -> Keyword +mkKw _keywordAscii _keywordUnicode = + Keyword + { _keywordHasReserved = hasReservedChar _keywordAscii, + .. + } + +asciiKw :: Text -> Keyword +asciiKw ascii = mkKw ascii Nothing + +unicodeKw :: Text -> Text -> Keyword +unicodeKw ascii unicode = mkKw ascii (Just unicode) + +isReservedChar :: Char -> Bool +isReservedChar = (`elem` reservedSymbols) + +hasReservedChar :: Text -> Bool +hasReservedChar = any isReservedChar . unpack + +reservedSymbols :: [Char] +reservedSymbols = ";(){}.@\"[]" diff --git a/src/Juvix/Data/Keyword/All.hs b/src/Juvix/Data/Keyword/All.hs new file mode 100644 index 0000000000..cc44d92899 --- /dev/null +++ b/src/Juvix/Data/Keyword/All.hs @@ -0,0 +1,215 @@ +module Juvix.Data.Keyword.All + ( module Juvix.Data.Keyword, + module Juvix.Data.Keyword.All, + ) +where + +import Juvix.Data.Keyword +import Juvix.Extra.Strings qualified as Str + +kwBuiltin :: Keyword +kwBuiltin = asciiKw Str.builtin + +kwAssign :: Keyword +kwAssign = asciiKw Str.assignAscii + +kwAxiom :: Keyword +kwAxiom = asciiKw Str.axiom + +kwColon :: Keyword +kwColon = asciiKw Str.colon + +kwColonOmega :: Keyword +kwColonOmega = unicodeKw Str.colonOmegaAscii Str.colonOmegaUnicode + +kwColonOne :: Keyword +kwColonOne = asciiKw Str.colonOne + +kwColonZero :: Keyword +kwColonZero = asciiKw Str.colonZero + +kwCompile :: Keyword +kwCompile = asciiKw Str.compile + +kwEnd :: Keyword +kwEnd = asciiKw Str.end + +kwHiding :: Keyword +kwHiding = asciiKw Str.hiding + +kwImport :: Keyword +kwImport = asciiKw Str.import_ + +kwForeign :: Keyword +kwForeign = asciiKw Str.foreign_ + +kwIn :: Keyword +kwIn = asciiKw Str.in_ + +kwInductive :: Keyword +kwInductive = asciiKw Str.inductive + +kwInfix :: Keyword +kwInfix = asciiKw Str.infix_ + +kwInfixl :: Keyword +kwInfixl = asciiKw Str.infixl_ + +kwInfixr :: Keyword +kwInfixr = asciiKw Str.infixr_ + +kwLambda :: Keyword +kwLambda = unicodeKw Str.lambdaAscii Str.lambdaUnicode + +kwLet :: Keyword +kwLet = asciiKw Str.let_ + +kwMapsTo :: Keyword +kwMapsTo = unicodeKw Str.mapstoAscii Str.mapstoUnicode + +kwModule :: Keyword +kwModule = asciiKw Str.module_ + +kwOpen :: Keyword +kwOpen = asciiKw Str.open + +kwPostfix :: Keyword +kwPostfix = asciiKw Str.postfix + +kwPublic :: Keyword +kwPublic = asciiKw Str.public + +kwRightArrow :: Keyword +kwRightArrow = unicodeKw Str.toAscii Str.toUnicode + +kwSemicolon :: Keyword +kwSemicolon = asciiKw Str.semicolon + +kwType :: Keyword +kwType = asciiKw Str.type_ + +kwTerminating :: Keyword +kwTerminating = asciiKw Str.terminating + +kwPositive :: Keyword +kwPositive = asciiKw Str.positive + +kwUsing :: Keyword +kwUsing = asciiKw Str.using + +kwWhere :: Keyword +kwWhere = asciiKw Str.where_ + +kwHole :: Keyword +kwHole = asciiKw Str.underscore + +kwWildcard :: Keyword +kwWildcard = asciiKw Str.underscore + +ghc :: Keyword +ghc = asciiKw Str.ghc + +cBackend :: Keyword +cBackend = asciiKw Str.cBackend + +kwLetRec :: Keyword +kwLetRec = asciiKw Str.letrec_ + +kwConstr :: Keyword +kwConstr = asciiKw Str.constr + +kwCase :: Keyword +kwCase = asciiKw Str.case_ + +kwOf :: Keyword +kwOf = asciiKw Str.of_ + +kwMatch :: Keyword +kwMatch = asciiKw Str.match_ + +kwWith :: Keyword +kwWith = asciiKw Str.with_ + +kwIf :: Keyword +kwIf = asciiKw Str.if_ + +kwThen :: Keyword +kwThen = asciiKw Str.then_ + +kwElse :: Keyword +kwElse = asciiKw Str.else_ + +kwDef :: Keyword +kwDef = asciiKw Str.def + +kwComma :: Keyword +kwComma = asciiKw Str.comma + +kwPlus :: Keyword +kwPlus = asciiKw Str.plus + +kwMinus :: Keyword +kwMinus = asciiKw Str.minus + +kwMul :: Keyword +kwMul = asciiKw Str.mul + +kwDiv :: Keyword +kwDiv = asciiKw Str.div + +kwMod :: Keyword +kwMod = asciiKw Str.mod + +kwEq :: Keyword +kwEq = asciiKw Str.equal + +kwLt :: Keyword +kwLt = asciiKw Str.less + +kwLe :: Keyword +kwLe = asciiKw Str.lessEqual + +kwGt :: Keyword +kwGt = asciiKw Str.greater + +kwGe :: Keyword +kwGe = asciiKw Str.greaterEqual + +kwBind :: Keyword +kwBind = asciiKw Str.bind + +kwSeq :: Keyword +kwSeq = asciiKw Str.seq_ + +kwTrace :: Keyword +kwTrace = asciiKw Str.trace_ + +kwFail :: Keyword +kwFail = asciiKw Str.fail_ + +kwFun :: Keyword +kwFun = asciiKw Str.fun_ + +kwStar :: Keyword +kwStar = asciiKw Str.mul + +kwTrue :: Keyword +kwTrue = asciiKw Str.true_ + +kwFalse :: Keyword +kwFalse = asciiKw Str.false_ + +kwArg :: Keyword +kwArg = asciiKw Str.arg_ + +kwTmp :: Keyword +kwTmp = asciiKw Str.tmp_ + +kwUnit :: Keyword +kwUnit = asciiKw Str.unit + +kwVoid :: Keyword +kwVoid = asciiKw Str.void + +kwDollar :: Keyword +kwDollar = asciiKw Str.dollar diff --git a/src/Juvix/Parser/Lexer.hs b/src/Juvix/Parser/Lexer.hs index 6c0fb1bb74..0b1aa445a5 100644 --- a/src/Juvix/Parser/Lexer.hs +++ b/src/Juvix/Parser/Lexer.hs @@ -3,9 +3,11 @@ module Juvix.Parser.Lexer where import Control.Monad.Trans.Class (lift) +import Data.HashSet qualified as HashSet import Data.Set qualified as Set import Data.Text qualified as Text import GHC.Unicode +import Juvix.Data.Keyword import Juvix.Extra.Strings qualified as Str import Juvix.Prelude import Text.Megaparsec as P hiding (sepBy1, sepEndBy1, some) @@ -64,46 +66,51 @@ number' int mn mx = do string' :: ParsecS r Text string' = pack <$> (char '"' >> manyTill L.charLiteral (char '"')) -keyword' :: ParsecS r () -> Text -> ParsecS r () -keyword' spc kw = - P.try $ do - P.chunk kw - notFollowedBy (satisfy validTailChar) - spc - -keywordL' :: Member (Reader ParserParams) r => ParsecS r () -> Text -> ParsecS r Interval -keywordL' spc kw = P.try $ do - i <- onlyInterval (P.chunk kw) - notFollowedBy (satisfy validTailChar) - spc - return i - -keywordSymbol' :: ParsecS r () -> Text -> ParsecS r () -keywordSymbol' spc kw = do - P.try $ do - void $ P.chunk kw - spc - -rawIdentifier :: [ParsecS r ()] -> ParsecS r Text -rawIdentifier allKeywords = rawIdentifier' (const False) allKeywords - -rawIdentifier' :: (Char -> Bool) -> [ParsecS r ()] -> ParsecS r Text -rawIdentifier' excludedTailChar allKeywords = do - notFollowedBy (choice allKeywords) +-- | The caller is responsible of consuming space after it. +kw' :: forall r. Member (Reader ParserParams) r => Keyword -> ParsecS r Interval +kw' k@Keyword {..} = P.label (unpack _keywordAscii) (reserved <|> normal) + where + -- If the ascii representation uses reserved symbols, we use chunk so that we parse exactly the keyword + -- (if chunk fails it does not consume anything so try is not needed) + reserved :: ParsecS r Interval + reserved + | _keywordHasReserved = onlyInterval (P.chunk _keywordAscii) + | otherwise = empty + -- we parse the longest valid identifier and then we check if it is the expected keyword + normal :: ParsecS r Interval + normal = P.try $ do + (w, i) <- interval morpheme + unless (w `elem` keywordStrings k) (failure Nothing (Set.singleton (Label (fromJust $ nonEmpty $ unpack _keywordAscii)))) + return i + +rawIdentifier' :: (Char -> Bool) -> HashSet Text -> ParsecS r Text +rawIdentifier' excludedTailChar allKeywords = label "" $ P.try $ do + w <- morpheme' excludedTailChar + when (w `HashSet.member` allKeywords) empty + return w + +rawIdentifier :: HashSet Text -> ParsecS r Text +rawIdentifier = rawIdentifier' (const False) + +validTailChar :: Char -> Bool +validTailChar = + (`notElem` reservedSymbols) + .&&. (isAlphaNum .||. (validFirstChar .&&. (`notElem` delimiterSymbols))) + +-- | A word that does not contain reserved symbols. It may be an identifier or a keyword. +morpheme' :: (Char -> Bool) -> ParsecS r Text +morpheme' excludedTailChar = do h <- P.satisfy validFirstChar t <- P.takeWhileP Nothing (validTailChar .&&. not . excludedTailChar) - return (Text.cons h t) + let iden = Text.cons h t + return iden -validTailChar :: Char -> Bool -validTailChar c = - isAlphaNum c || (validFirstChar c && notElem c delimiterSymbols) +morpheme :: ParsecS r Text +morpheme = morpheme' (const False) delimiterSymbols :: [Char] delimiterSymbols = "," -reservedSymbols :: [Char] -reservedSymbols = "@\";(){}[]." - validFirstChar :: Char -> Bool validFirstChar c = not (isNumber c || isSpace c || (c `elem` reservedSymbols))