Skip to content

Commit

Permalink
v1.0.0.0: Migrate to libpcre2
Browse files Browse the repository at this point in the history
  • Loading branch information
peteryland committed Feb 13, 2025
1 parent e1a79d3 commit 7c0049f
Show file tree
Hide file tree
Showing 9 changed files with 483 additions and 334 deletions.
4 changes: 4 additions & 0 deletions ChangeLog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
See also http://pvp.haskell.org/faq

## 1.0.0.0

- Migrate from the obsolete pcre3 library to the new (confusingly-named) pcre2 one

## 0.95.0.0 revision 6

- Allow `containers-0.7`
Expand Down
28 changes: 22 additions & 6 deletions regex-pcre.cabal
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
Cabal-Version: 1.12
Name: regex-pcre
Version: 0.95.0.0
x-revision: 6
Version: 1.0.0.0

build-type: Simple
license: BSD3
Expand Down Expand Up @@ -39,7 +38,7 @@ tested-with:
flag pkg-config
default: True
manual: True
description: Use @pkg-config(1)@ to locate foreign @pcre@ library.
description: Use @pkg-config(1)@ to locate foreign @pcre2@ library.

source-repository head
type: git
Expand All @@ -48,7 +47,7 @@ source-repository head
source-repository this
type: git
location: https://github.com/haskell-hvr/regex-pcre.git
tag: v0.95.0.0-r6
tag: v1.0.0.0

library
hs-source-dirs: src
Expand Down Expand Up @@ -84,11 +83,28 @@ library
build-depends: fail == 4.9.*

if flag(pkg-config)
pkgconfig-depends: libpcre
pkgconfig-depends: libpcre2-8
else
extra-libraries: pcre
extra-libraries: pcre2-8

ghc-options:
-O2
-Wall -fno-warn-unused-imports
-- -Wcompat -- options cannot be changed in a revision

test-suite regex-pcre-test
default-language: Haskell2010
ghc-options: -Wall
type: exitcode-stdio-1.0
hs-source-dirs: test
main-is: Main.hs
build-depends: base >= 4.3 && < 5
, bytestring >= 0.9 && < 0.13
, HUnit >= 1.6 && < 1.7
, regex-pcre
, utf8-string >= 1.0 && < 1.1

if flag(pkg-config)
pkgconfig-depends: libpcre2-8
else
extra-libraries: pcre2-8
34 changes: 20 additions & 14 deletions src/Text/Regex/PCRE.hs
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,13 @@ expressions. If you import this along with other backends, then
you should do so with qualified imports, perhaps renamed for
convenience.
Using the provided 'CompOption' and 'ExecOption' values and if
'configUTF8' is True, then you might be able to send UTF8 encoded
ByteStrings to PCRE and get sensible results. This is currently
untested.
From version 1.0.0.0, this library uses the newer libpcre2, which
supports UTF8-encoded strings by default. As such, 'configUTF8'
now always returns True.
The regular expression can be provided as a 'ByteString', but it will
be copied and a NUL byte appended to make a 'CString' unless such a
byte is already present. Thus the regular expression cannot contain
byte is already present. Thus, the regular expression cannot contain
an explicit NUL byte. The search string is passed as a 'CStringLen'
and may contain NUL bytes and does not need to end in a NUL
byte. 'ByteString's are searched in place (via unsafeUseAsCStringLen).
Expand All @@ -20,9 +19,9 @@ A 'String' will be converted into a 'CString' or 'CStringLen' for
processing. Doing this repeatedly will be very inefficient.
The "Text.Regex.PCRE.String", "Text.Regex.PCRE.ByteString", and
"Text.Regex.PCRE.Wrap" modules provides both the high level interface
"Text.Regex.PCRE.Wrap" modules provide both the high-level interface
exported by this module and medium- and low-level interfaces that
returns error using Either structures.
return errors using 'Either' structures.
-}
{- Copyright : (c) Chris Kuklewicz 2007 -}
module Text.Regex.PCRE(getVersion_Text_Regex_PCRE
Expand All @@ -32,14 +31,21 @@ module Text.Regex.PCRE(getVersion_Text_Regex_PCRE

import Prelude hiding (fail)

import Text.Regex.PCRE.Wrap(Regex, CompOption(CompOption), ExecOption(ExecOption), (=~), (=~~),
import Text.Regex.PCRE.Wrap(
Regex, CompOption(CompOption), MatchOption(MatchOption),
(=~), (=~~),
unusedOffset, getNumSubs, configUTF8, getVersion,
compBlank, compAnchored, compAutoCallout, compCaseless,
compDollarEndOnly, compDotAll, compExtended, compExtra,
compFirstLine, compMultiline, compNoAutoCapture, compUngreedy,
compUTF8, compNoUTF8Check,
execBlank, execAnchored, execNotBOL, execNotEOL, execNotEmpty,
execNoUTF8Check, execPartial)
compBlank, compAnchored, compEndAnchored, compAllowEmptyClass,
compAltBSUX, compAltExtendedClass, compAltVerbnames,
compAutoCallout, compCaseless, compDollarEndOnly, compDotAll,
compDupNames, compExtended, compExtendedMore, compFirstLine,
compLiteral, compMatchUnsetBackref, compMultiline,
compNeverBackslashC, compNoAutoCapture, compNoAutoPossess,
compNoDotstarAnchor, compNoUTFCheck, compUngreedy, compUTF,
matchBlank, matchAnchored, matchCopyMatchedSubject,
matchDisableRecurseLoopCheck, matchEndAnchored, matchNotBOL,
matchNotEOL, matchNotEmpty, matchNotEmptyAtStart,
matchNoUTFCheck, matchPartialHard, matchPartialSoft)
import Text.Regex.PCRE.String()
import Text.Regex.PCRE.Sequence()
import Text.Regex.PCRE.ByteString()
Expand Down
58 changes: 36 additions & 22 deletions src/Text/Regex/PCRE/ByteString.hs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ module Text.Regex.PCRE.ByteString(
MatchOffset,
MatchLength,
CompOption(CompOption),
ExecOption(ExecOption),
MatchOption(MatchOption),
ReturnCode,
WrapError,
-- ** Miscellaneous
Expand All @@ -23,26 +23,45 @@ module Text.Regex.PCRE.ByteString(
-- ** CompOption flags
compBlank,
compAnchored,
compEndAnchored, -- new in v1.0.0.0 (pcre2)
compAllowEmptyClass, -- new in v1.0.0.0 (pcre2)
compAltBSUX, -- new in v1.0.0.0 (pcre2)
compAltExtendedClass, -- new in v1.0.0.0 (pcre2)
compAltVerbnames, -- new in v1.0.0.0 (pcre2)
compAutoCallout,
compCaseless,
compDollarEndOnly,
compDotAll,
compDupNames, -- new in v1.0.0.0 (pcre2)
compExtended,
compExtra,
compExtendedMore, -- new in v1.0.0.0 (pcre2)
-- compExtra, -- obsoleted in v1.0.0.0, pcre2 is always strict in this way
compFirstLine,
compLiteral, -- new in v1.0.0.0 (pcre2)
compMatchUnsetBackref, -- new in v1.0.0.0 (pcre2)
compMultiline,
compNeverBackslashC, -- new in v1.0.0.0 (pcre2)
compNoAutoCapture,
compNoAutoPossess, -- new in v1.0.0.0 (pcre2)
compNoDotstarAnchor, -- new in v1.0.0.0 (pcre2)
-- compNoUTF8Check, -- obsoleted in v1.0.0.0 (pcre2), use compNoUTFCheck
compNoUTFCheck,
compUngreedy,
compUTF8,
compNoUTF8Check,
-- ** ExecOption flags
execBlank,
execAnchored,
execNotBOL,
execNotEOL,
execNotEmpty,
execNoUTF8Check,
execPartial
-- compUTF8, -- obsoleted in v1.0.0.0 (pcre2), use compUTF
compUTF,
-- ** MatchOption flags, new to v1.0.0.0 (pcre2), replacing the obsolete ExecOptions
matchBlank,
matchAnchored,
matchCopyMatchedSubject, -- new in v1.0.0.0 (pcre2)
matchDisableRecurseLoopCheck, -- new in v1.0.0.0 (pcre2)
matchEndAnchored, -- new in v1.0.0.0 (pcre2)
matchNotBOL,
matchNotEOL,
matchNotEmpty,
matchNotEmptyAtStart, -- new in v1.0.0.0 (pcre2)
matchNoUTFCheck,
matchPartialHard,
matchPartialSoft
) where

import Prelude hiding (fail)
Expand All @@ -51,8 +70,8 @@ import Control.Monad.Fail (MonadFail(fail))
import Text.Regex.PCRE.Wrap -- all
import Data.Array(Array,listArray)
import Data.ByteString(ByteString)
import qualified Data.ByteString as B(empty,useAsCString,last,take,drop,null,pack)
import qualified Data.ByteString.Unsafe as B(unsafeUseAsCString,unsafeUseAsCStringLen)
import qualified Data.ByteString as B(empty,take,drop,pack)
import qualified Data.ByteString.Unsafe as B(unsafeUseAsCStringLen)
import System.IO.Unsafe(unsafePerformIO)
import Text.Regex.Base.RegexLike(RegexContext(..),RegexMaker(..),RegexLike(..),MatchOffset,MatchLength)
import Text.Regex.Base.Impl(polymatch,polymatchM)
Expand All @@ -75,7 +94,7 @@ asCStringLen s op = B.unsafeUseAsCStringLen s checked
myEmpty = B.pack [0]
trim (ptr,_) = (ptr,0)

instance RegexMaker Regex CompOption ExecOption ByteString where
instance RegexMaker Regex CompOption MatchOption ByteString where
makeRegexOpts c e pattern = unsafePerformIO $
compile c e pattern >>= unwrap
makeRegexOptsM c e pattern = either (fail.show) return $ unsafePerformIO $
Expand All @@ -95,15 +114,10 @@ instance RegexLike Regex ByteString where
-- | Compiles a regular expression
--
compile :: CompOption -- ^ (summed together)
-> ExecOption -- ^ (summed together)
-> MatchOption -- ^ (summed together)
-> ByteString -- ^ The regular expression to compile
-> IO (Either (MatchOffset,String) Regex) -- ^ Returns: the compiled regular expression
compile c e pattern = do
-- PCRE does not allow one to specify a length for the regular expression, it must by 0 terminated
let asCString bs = if (not (B.null bs)) && (0==B.last bs)
then B.unsafeUseAsCString bs
else B.useAsCString bs
asCString pattern (wrapCompile c e)
compile c e pattern = B.unsafeUseAsCStringLen pattern (wrapCompile c e)

-- ---------------------------------------------------------------------
-- | Matches a regular expression against a buffer, returning the buffer
Expand Down
68 changes: 40 additions & 28 deletions src/Text/Regex/PCRE/ByteString/Lazy.hs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ module Text.Regex.PCRE.ByteString.Lazy(
MatchOffset,
MatchLength,
CompOption(CompOption),
ExecOption(ExecOption),
MatchOption(MatchOption),
ReturnCode,
WrapError,
-- ** Miscellaneous
Expand All @@ -23,41 +23,60 @@ module Text.Regex.PCRE.ByteString.Lazy(
-- ** CompOption flags
compBlank,
compAnchored,
compEndAnchored, -- new in v1.0.0.0 (pcre2)
compAllowEmptyClass, -- new in v1.0.0.0 (pcre2)
compAltBSUX, -- new in v1.0.0.0 (pcre2)
compAltExtendedClass, -- new in v1.0.0.0 (pcre2)
compAltVerbnames, -- new in v1.0.0.0 (pcre2)
compAutoCallout,
compCaseless,
compDollarEndOnly,
compDotAll,
compDupNames, -- new in v1.0.0.0 (pcre2)
compExtended,
compExtra,
compExtendedMore, -- new in v1.0.0.0 (pcre2)
-- compExtra, -- obsoleted in v1.0.0.0, pcre2 is always strict in this way
compFirstLine,
compLiteral, -- new in v1.0.0.0 (pcre2)
compMatchUnsetBackref, -- new in v1.0.0.0 (pcre2)
compMultiline,
compNeverBackslashC, -- new in v1.0.0.0 (pcre2)
compNoAutoCapture,
compNoAutoPossess, -- new in v1.0.0.0 (pcre2)
compNoDotstarAnchor, -- new in v1.0.0.0 (pcre2)
-- compNoUTF8Check, -- obsoleted in v1.0.0.0 (pcre2), use compNoUTFCheck
compNoUTFCheck,
compUngreedy,
compUTF8,
compNoUTF8Check,
-- ** ExecOption flags
execBlank,
execAnchored,
execNotBOL,
execNotEOL,
execNotEmpty,
execNoUTF8Check,
execPartial
-- compUTF8, -- obsoleted in v1.0.0.0 (pcre2), use compUTF
compUTF,
-- ** MatchOption flags, new to v1.0.0.0 (pcre2), replacing the obsolete ExecOptions
matchBlank,
matchAnchored,
matchCopyMatchedSubject, -- new in v1.0.0.0 (pcre2)
matchDisableRecurseLoopCheck, -- new in v1.0.0.0 (pcre2)
matchEndAnchored, -- new in v1.0.0.0 (pcre2)
matchNotBOL,
matchNotEOL,
matchNotEmpty,
matchNotEmptyAtStart, -- new in v1.0.0.0 (pcre2)
matchNoUTFCheck,
matchPartialHard,
matchPartialSoft
) where

import Prelude hiding (fail)
import Control.Monad.Fail (MonadFail(fail))

import Text.Regex.PCRE.Wrap -- all
import Data.Array(Array)
import qualified Data.ByteString.Lazy as L(ByteString,toChunks,fromChunks,last,null,snoc)
import qualified Data.ByteString.Lazy as L(ByteString,toChunks,fromChunks)
import qualified Data.ByteString as B(ByteString,concat,pack)
import qualified Data.ByteString.Unsafe as B(unsafeUseAsCString,unsafeUseAsCStringLen)
import qualified Data.ByteString.Unsafe as B(unsafeUseAsCStringLen)
import System.IO.Unsafe(unsafePerformIO)
import Text.Regex.Base.RegexLike(RegexContext(..),RegexMaker(..),RegexLike(..),MatchOffset,MatchLength)
import Text.Regex.Base.Impl(polymatch,polymatchM)
import qualified Text.Regex.PCRE.ByteString as BS(execute,regexec)
import Foreign.C.String(CString,CStringLen)
import Foreign.C.String(CStringLen)
import Foreign(nullPtr)

instance RegexContext Regex L.ByteString L.ByteString where
Expand All @@ -76,12 +95,6 @@ unwrap :: (Show e) => Either e v -> IO v
unwrap x = case x of Left err -> fail ("Text.Regex.PCRE.ByteString.Lazy died: "++ show err)
Right v -> return v

{-# INLINE asCString #-}
asCString :: L.ByteString -> (CString -> IO a) -> IO a
asCString s = if (not (L.null s)) && (0==L.last s)
then B.unsafeUseAsCString (fromLazy s)
else B.unsafeUseAsCString (fromLazy (L.snoc s 0))

{-# INLINE asCStringLen #-}
asCStringLen :: L.ByteString -> (CStringLen -> IO a) -> IO a
asCStringLen ls op = B.unsafeUseAsCStringLen (fromLazy ls) checked
Expand All @@ -90,7 +103,7 @@ asCStringLen ls op = B.unsafeUseAsCStringLen (fromLazy ls) checked
myEmpty = B.pack [0]
trim (ptr,_) = (ptr,0)

instance RegexMaker Regex CompOption ExecOption L.ByteString where
instance RegexMaker Regex CompOption MatchOption L.ByteString where
makeRegexOpts c e pattern = unsafePerformIO $
compile c e pattern >>= unwrap
makeRegexOptsM c e pattern = either (fail.show) return $ unsafePerformIO $
Expand All @@ -109,19 +122,18 @@ instance RegexLike Regex L.ByteString where
-- ---------------------------------------------------------------------
-- | Compiles a regular expression
--
compile :: CompOption -- ^ (summed together)
-> ExecOption -- ^ (summed together)
-> L.ByteString -- ^ The regular expression to compile
compile :: CompOption -- ^ (summed together)
-> MatchOption -- ^ (summed together)
-> L.ByteString -- ^ The regular expression to compile
-> IO (Either (MatchOffset,String) Regex) -- ^ Returns: the compiled regular expression
compile c e pattern = do
asCString pattern (wrapCompile c e)
compile c e pattern = B.unsafeUseAsCStringLen (fromLazy pattern) (wrapCompile c e)

-- ---------------------------------------------------------------------
-- | Matches a regular expression against a buffer, returning the buffer
-- indicies of the match, and any submatches
--
-- | Matches a regular expression against a string
execute :: Regex -- ^ Compiled regular expression
execute :: Regex -- ^ Compiled regular expression
-> L.ByteString -- ^ String to match against
-> IO (Either WrapError (Maybe (Array Int (MatchOffset,MatchLength))))
-- ^ Returns: 'Nothing' if the regex did not match the
Expand Down
Loading

0 comments on commit 7c0049f

Please sign in to comment.