-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathParser.hs
295 lines (232 loc) · 9.62 KB
/
Parser.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE CPP #-}
module Text.Toml.Parser
( module Text.Toml.Parser
, module Text.Toml.Types
) where
import Control.Applicative hiding (many, optional, (<|>))
import Control.Monad
import qualified Data.HashMap.Strict as M
import qualified Data.List as L
import qualified Data.Set as S
import Data.Text (Text, pack, unpack)
import qualified Data.Vector as V
#if MIN_VERSION_time(1,5,0)
import Data.Time.Format (defaultTimeLocale, iso8601DateFormat,
parseTimeM)
#else
import Data.Time.Format (parseTime)
import System.Locale (defaultTimeLocale, iso8601DateFormat)
#endif
import Numeric (readHex)
import Text.Parsec
import Text.Toml.Types
-- Imported as last to fix redundancy warning
import Prelude hiding (concat, takeWhile)
-- | Our very own Parser type.
type Parser a = forall s. Parsec Text s a
-- | Convenience function for the test suite and GHCI.
parseOnly :: Parsec Text (S.Set [Text]) a -> Text -> Either ParseError a
parseOnly p str = runParser (p <* eof) S.empty "test" str
-- | Parses a complete document formatted according to the TOML spec.
tomlDoc :: Parsec Text (S.Set [Text]) Table
tomlDoc = do
skipBlanks
topTable <- table
namedSections <- many namedSection
-- Ensure the input is completely consumed
eof
-- Load each named section into the top table
foldM (flip (insert Explicit)) topTable namedSections
-- | Parses a table of key-value pairs.
table :: Parser Table
table = do
pairs <- try (many (assignment <* skipBlanks)) <|> (try skipBlanks >> return [])
case maybeDupe (map fst pairs) of
Just k -> fail $ "Cannot redefine key " ++ (unpack k)
Nothing -> return $ M.fromList pairs
-- | Parses an inline table of key-value pairs.
inlineTable :: Parser Node
inlineTable = do
pairs <- between (char '{') (char '}') (skipSpaces *> separatedValues <* skipSpaces)
case maybeDupe (map fst pairs) of
Just k -> fail $ "Cannot redefine key " ++ (unpack k)
Nothing -> return $ VTable $ M.fromList pairs
where
skipSpaces = many (satisfy isSpc)
separatedValues = sepBy (skipSpaces *> assignment <* skipSpaces) comma
comma = skipSpaces >> char ',' >> skipSpaces
-- | Find dupes, if any.
maybeDupe :: Ord a => [a] -> Maybe a
maybeDupe xx = dup xx S.empty
where
dup [] _ = Nothing
dup (x:xs) s = if S.member x s then Just x else dup xs (S.insert x s)
-- | Parses a 'Table' or 'TableArray' with its header.
-- The resulting tuple has the header's value in the first position, and the
-- 'NTable' or 'NTArray' in the second.
namedSection :: Parser ([Text], Node)
namedSection = do
eitherHdr <- try (Left <$> tableHeader) <|> try (Right <$> tableArrayHeader)
skipBlanks
tbl <- table
skipBlanks
return $ case eitherHdr of Left ns -> (ns, VTable tbl )
Right ns -> (ns, VTArray $ V.singleton tbl)
-- | Parses a table header.
tableHeader :: Parser [Text]
tableHeader = between (char '[') (char ']') headerValue
-- | Parses a table array header.
tableArrayHeader :: Parser [Text]
tableArrayHeader = between (twoChar '[') (twoChar ']') headerValue
where
twoChar c = count 2 (char c)
-- | Parses the value of any header (names separated by dots), into a list of 'Text'.
headerValue :: Parser [Text]
headerValue = ((pack <$> many1 keyChar) <|> anyStr') `sepBy1` (char '.')
where
keyChar = alphaNum <|> char '_' <|> char '-'
-- | Parses a key-value assignment.
assignment :: Parser (Text, Node)
assignment = do
k <- (pack <$> many1 keyChar) <|> anyStr'
many (satisfy isSpc) >> char '=' >> skipBlanks
v <- value
return (k, v)
where
-- TODO: Follow the spec, e.g.: only first char cannot be '['.
keyChar = alphaNum <|> char '_' <|> char '-'
-- | Parses a value.
value :: Parser Node
value = (try array <?> "array")
<|> (try boolean <?> "boolean")
<|> (try anyStr <?> "string")
<|> (try datetime <?> "datetime")
<|> (try float <?> "float")
<|> (try integer <?> "integer")
<|> (try inlineTable <?> "inline table")
--
-- | * Toml value parsers
--
array :: Parser Node
array = (try (arrayOf array) <?> "array of arrays")
<|> (try (arrayOf boolean) <?> "array of booleans")
<|> (try (arrayOf anyStr) <?> "array of strings")
<|> (try (arrayOf datetime) <?> "array of datetimes")
<|> (try (arrayOf float) <?> "array of floats")
<|> (try (arrayOf integer) <?> "array of integers")
boolean :: Parser Node
boolean = VBoolean <$> ( (try . string $ "true") *> return True <|>
(try . string $ "false") *> return False )
anyStr :: Parser Node
anyStr = VString <$> anyStr'
anyStr' :: Parser Text
anyStr' = try multiBasicStr <|> try basicStr <|> try multiLiteralStr <|> try literalStr
basicStr :: Parser Text
basicStr = between dQuote dQuote (fmap pack $ many strChar)
where
strChar = try escSeq <|> try (satisfy (\c -> c /= '"' && c /= '\\'))
dQuote = char '\"'
multiBasicStr :: Parser Text
multiBasicStr = (openDQuote3 *> escWhiteSpc *> (pack <$> manyTill strChar (try dQuote3)))
where
-- | Parse the a tripple-double quote, with possibly a newline attached
openDQuote3 = try (dQuote3 <* char '\n') <|> try dQuote3
-- | Parse tripple-double quotes
dQuote3 = count 3 $ char '"'
-- | Parse a string char, accepting escaped codes, ignoring escaped white space
strChar = (escSeq <|> (satisfy (/= '\\'))) <* escWhiteSpc
-- | Parse escaped white space, if any
escWhiteSpc = many $ char '\\' >> char '\n' >> (many $ satisfy (\c -> isSpc c || c == '\n'))
literalStr :: Parser Text
literalStr = between sQuote sQuote (pack <$> many (satisfy (/= '\'')))
where
sQuote = char '\''
multiLiteralStr :: Parser Text
multiLiteralStr = (openSQuote3 *> (fmap pack $ manyTill anyChar sQuote3))
where
-- | Parse the a tripple-single quote, with possibly a newline attached
openSQuote3 = try (sQuote3 <* char '\n') <|> try sQuote3
-- | Parse tripple-single quotes
sQuote3 = try . count 3 . char $ '\''
datetime :: Parser Node
datetime = do
d <- try $ manyTill anyChar (char 'Z')
#if MIN_VERSION_time(1,5,0)
let mt = parseTimeM True defaultTimeLocale (iso8601DateFormat $ Just "%X") d
#else
let mt = parseTime defaultTimeLocale (iso8601DateFormat $ Just "%X") d
#endif
case mt of Just t -> return $ VDatetime t
Nothing -> fail "parsing datetime failed"
-- | Attoparsec 'double' parses scientific "e" notation; reimplement according to Toml spec.
float :: Parser Node
float = VFloat <$> do
n <- intStr <* lookAhead (satisfy (\c -> c == '.' || c == 'e' || c == 'E'))
d <- try (satisfy (== '.') *> uintStr) <|> return "0"
e <- try (satisfy (\c -> c == 'e' || c == 'E') *> intStr) <|> return "0"
return . read . L.concat $ [n, ".", d, "e", e]
where
sign = try (string "-") <|> (try (char '+') >> return "") <|> return ""
uintStr = (:) <$> digit <*> many (optional (char '_') *> digit)
intStr = do s <- sign
u <- uintStr
return . L.concat $ [s, u]
integer :: Parser Node
integer = VInteger <$> (signed $ read <$> uintStr)
where
uintStr :: Parser [Char]
uintStr = (:) <$> digit <*> many (optional (char '_') *> digit)
--
-- * Utility functions
--
-- | Parses the elements of an array, while restricting them to a certain type.
arrayOf :: Parser Node -> Parser Node
arrayOf p = (VArray . V.fromList) <$>
between (char '[') (char ']') (skipBlanks *> separatedValues)
where
separatedValues = sepEndBy (skipBlanks *> try p <* skipBlanks) comma <* skipBlanks
comma = skipBlanks >> char ',' >> skipBlanks
-- | Parser for escape sequences.
escSeq :: Parser Char
escSeq = char '\\' *> escSeqChar
where
escSeqChar = try (char '"') *> return '"'
<|> try (char '\\') *> return '\\'
<|> try (char '/') *> return '/'
<|> try (char 'b') *> return '\b'
<|> try (char 't') *> return '\t'
<|> try (char 'n') *> return '\n'
<|> try (char 'f') *> return '\f'
<|> try (char 'r') *> return '\r'
<|> try (char 'u') *> unicodeHex 4
<|> try (char 'U') *> unicodeHex 8
<?> "escape character"
-- | Parser for unicode hexadecimal values of representation length 'n'.
unicodeHex :: Int -> Parser Char
unicodeHex n = do
h <- count n (satisfy isHex)
let v = fst . head . readHex $ h
return $ if v <= maxChar then toEnum v else '_'
where
isHex c = (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
maxChar = fromEnum (maxBound :: Char)
-- | Parser for signs (a plus or a minus).
signed :: Num a => Parser a -> Parser a
signed p = try (negate <$> (char '-' *> p))
<|> try (char '+' *> p)
<|> try p
-- | Parses the (rest of the) line including an EOF, whitespace and comments.
skipBlanks :: Parser ()
skipBlanks = skipMany blank
where
blank = try ((many1 $ satisfy isSpc) >> return ()) <|> try comment <|> try eol
comment = char '#' >> (many $ satisfy (/= '\n')) >> return ()
-- | Results in 'True' for whitespace chars, tab or space, according to spec.
isSpc :: Char -> Bool
isSpc c = c == ' ' || c == '\t'
-- | Parse an EOL, as per TOML spec this is 0x0A a.k.a. '\n' or 0x0D a.k.a. '\r'.
eol :: Parser ()
eol = (string "\n" <|> string "\r\n") >> return ()