Skip to content

Commit

Permalink
Lexer: report indentation warnings at column 1
Browse files Browse the repository at this point in the history
We use a Latin1 generated parser with Alex, but we also parses Unicode
BOM, unbreakable spaces, etc. In recent Alex, the reported column isn't
expressed in Unicode chars anymore but in bytes/ASCII chars (probably
due to
haskell/alex@ae525e3
but I haven't checked), which broke our tests (see
#8896).

To work around this we report indentation warnings at token start position,
instead of token end position (i.e. always 1). Otherwise position makes
no sense anymore for the user.

(cherry picked from commit 5f72880)
  • Loading branch information
hsyl20 authored and mergify[bot] committed Apr 26, 2023
1 parent 022f4a7 commit 3103b87
Show file tree
Hide file tree
Showing 7 changed files with 36 additions and 30 deletions.
26 changes: 13 additions & 13 deletions Cabal-syntax/src/Distribution/Fields/Lexer.hs
Original file line number Diff line number Diff line change
Expand Up @@ -195,17 +195,17 @@ toki t pos len input = return $! L pos (t (B.take len input))
tok :: Token -> Position -> Int -> ByteString -> Lex LToken
tok t pos _len _input = return $! L pos t

checkLeadingWhitespace :: Int -> ByteString -> Lex Int
checkLeadingWhitespace len bs
checkLeadingWhitespace :: Position -> Int -> ByteString -> Lex Int
checkLeadingWhitespace pos len bs
| B.any (== 9) (B.take len bs) = do
addWarning LexWarningTab
checkWhitespace len bs
| otherwise = checkWhitespace len bs
addWarningAt pos LexWarningTab
checkWhitespace pos len bs
| otherwise = checkWhitespace pos len bs

checkWhitespace :: Int -> ByteString -> Lex Int
checkWhitespace len bs
checkWhitespace :: Position -> Int -> ByteString -> Lex Int
checkWhitespace pos len bs
| B.any (== 194) (B.take len bs) = do
addWarning LexWarningNBSP
addWarningAt pos LexWarningNBSP
return $ len - B.count 194 (B.take len bs)
| otherwise = return len

Expand Down Expand Up @@ -313,12 +313,12 @@ bol_section = 3
in_field_braces = 4
in_field_layout = 5
in_section = 6
alex_action_0 = \_ len _ -> do
when (len /= 0) $ addWarning LexWarningBOM
alex_action_0 = \pos len _ -> do
when (len /= 0) $ addWarningAt pos LexWarningBOM
setStartCode bol_section
lexToken
alex_action_1 = \_pos len inp -> checkWhitespace len inp >> adjustPos retPos >> lexToken
alex_action_3 = \pos len inp -> checkLeadingWhitespace len inp >>
alex_action_1 = \pos len inp -> checkWhitespace pos len inp >> adjustPos retPos >> lexToken
alex_action_3 = \pos len inp -> checkLeadingWhitespace pos len inp >>
if B.length inp == len
then return (L pos EOF)
else setStartCode in_section
Expand All @@ -333,7 +333,7 @@ alex_action_12 = tok Colon
alex_action_13 = tok OpenBrace
alex_action_14 = tok CloseBrace
alex_action_15 = \_ _ _ -> adjustPos retPos >> setStartCode bol_section >> lexToken
alex_action_16 = \pos len inp -> checkLeadingWhitespace len inp >>= \len' ->
alex_action_16 = \pos len inp -> checkLeadingWhitespace pos len inp >>= \len' ->
if B.length inp == len
then return (L pos EOF)
else setStartCode in_field_layout
Expand Down
6 changes: 6 additions & 0 deletions Cabal-syntax/src/Distribution/Fields/LexerMonad.hs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ module Distribution.Fields.LexerMonad (
LexWarning(..),
LexWarningType(..),
addWarning,
addWarningAt,
toPWarnings,

) where
Expand Down Expand Up @@ -153,3 +154,8 @@ setStartCode c = Lex $ \s -> LexResult s{ curCode = c } ()
addWarning :: LexWarningType -> Lex ()
addWarning wt = Lex $ \s@LexState{ curPos = pos, warnings = ws } ->
LexResult s{ warnings = LexWarning wt pos : ws } ()

-- | Add warning at specific position
addWarningAt :: Position -> LexWarningType -> Lex ()
addWarningAt pos wt = Lex $ \s@LexState{ warnings = ws } ->
LexResult s{ warnings = LexWarning wt pos : ws } ()
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Octree-0.5.cabal:39:3: Non breaking spaces at 39:3, 41:3, 43:3
Octree-0.5.cabal:39:1: Non breaking spaces at 39:1, 41:1, 43:1
cabal-version: >=1.8
name: Octree
version: 0.5
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
monad-param.cabal:19:3: Tabs used as indentation at 19:3, 20:3
monad-param.cabal:19:1: Tabs used as indentation at 19:1, 20:1
name: monad-param
version: 0.0.1
license: BSD3
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
th-lift-instances.cabal:15:9: Tabs used as indentation at 15:9
th-lift-instances.cabal:15:1: Tabs used as indentation at 15:1
cabal-version: >=1.10
name: th-lift-instances
version: 0.1.4
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# cabal check
Warning: These warnings may cause trouble when distributing the package:
Warning: pkg.cabal:1:2: Byte-order mark found at the beginning of the file
Warning: pkg.cabal:1:1: Byte-order mark found at the beginning of the file
Warning: The following errors will cause portability problems on other environments:
Warning: ./pkg.cabal starts with an Unicode byte order mark (BOM). This may cause problems with older cabal versions.
Warning: Hackage would reject this package.
26 changes: 13 additions & 13 deletions templates/Lexer.x
Original file line number Diff line number Diff line change
Expand Up @@ -83,22 +83,22 @@ $instresc = $printable
tokens :-

<0> {
@bom? { \_ len _ -> do
when (len /= 0) $ addWarning LexWarningBOM
@bom? { \pos len _ -> do
when (len /= 0) $ addWarningAt pos LexWarningBOM
setStartCode bol_section
lexToken
}
}

<bol_section, bol_field_layout, bol_field_braces> {
@nbspspacetab* @nl { \_pos len inp -> checkWhitespace len inp >> adjustPos retPos >> lexToken }
@nbspspacetab* @nl { \pos len inp -> checkWhitespace pos len inp >> adjustPos retPos >> lexToken }
-- no @nl here to allow for comments on last line of the file with no trailing \n
$spacetab* "--" $comment* ; -- TODO: check the lack of @nl works here
-- including counting line numbers
}

<bol_section> {
@nbspspacetab* { \pos len inp -> checkLeadingWhitespace len inp >>
@nbspspacetab* { \pos len inp -> checkLeadingWhitespace pos len inp >>
if B.length inp == len
then return (L pos EOF)
else setStartCode in_section
Expand All @@ -123,7 +123,7 @@ tokens :-
}

<bol_field_layout> {
@nbspspacetab* { \pos len inp -> checkLeadingWhitespace len inp >>= \len' ->
@nbspspacetab* { \pos len inp -> checkLeadingWhitespace pos len inp >>= \len' ->
if B.length inp == len
then return (L pos EOF)
else setStartCode in_field_layout
Expand Down Expand Up @@ -172,17 +172,17 @@ toki t pos len input = return $! L pos (t (B.take len input))
tok :: Token -> Position -> Int -> ByteString -> Lex LToken
tok t pos _len _input = return $! L pos t

checkLeadingWhitespace :: Int -> ByteString -> Lex Int
checkLeadingWhitespace len bs
checkLeadingWhitespace :: Position -> Int -> ByteString -> Lex Int
checkLeadingWhitespace pos len bs
| B.any (== 9) (B.take len bs) = do
addWarning LexWarningTab
checkWhitespace len bs
| otherwise = checkWhitespace len bs
addWarningAt pos LexWarningTab
checkWhitespace pos len bs
| otherwise = checkWhitespace pos len bs

checkWhitespace :: Int -> ByteString -> Lex Int
checkWhitespace len bs
checkWhitespace :: Position -> Int -> ByteString -> Lex Int
checkWhitespace pos len bs
| B.any (== 194) (B.take len bs) = do
addWarning LexWarningNBSP
addWarningAt pos LexWarningNBSP
return $ len - B.count 194 (B.take len bs)
| otherwise = return len

Expand Down

0 comments on commit 3103b87

Please sign in to comment.