robstewart57 · robstewart57 · Nov 26, 2016 · Nov 17, 2016 · Nov 17, 2016 · Nov 17, 2016
diff --git a/bench/MainCriterion.hs b/bench/MainCriterion.hs
@@ -44,8 +44,9 @@ main =
   defaultMainWith
     (defaultConfig {resamples = 100})
     [ env
-      -- fawltyContent <- T.pack <$> readFile "data/ttl/fawlty1.ttl"
         (do rdfContent <- T.pack <$> readFile "bills.099.actions.rdf"
+            fawltyContentTurtle <- T.pack <$> readFile "data/ttl/fawlty1.ttl"
+            fawltyContentNTriples <- T.pack <$> readFile "data/nt/all-fawlty-towers.nt"
             let (Right rdf1) =
                   parseString (XmlParser Nothing Nothing) rdfContent
             let (Right rdf2) =
@@ -59,21 +60,44 @@ main =
               ( rdf1 :: RDF TList
               , rdf2 :: RDF AdjHashMap
               , triples :: Triples
-              )) $ \ ~(triplesList, adjMap,triples) ->
+              , fawltyContentNTriples :: T.Text
+              , fawltyContentTurtle :: T.Text
+              )) $ \ ~(triplesList, adjMap, triples, fawltyContentNTriples, fawltyContentTurtle) ->
         bgroup
           "rdf4h"
-          --  bgroup
-          --     "parsers"
-          --     [ bench "AdjHashMap" $
-          --       nf (parseTtlRDF :: T.Text -> RDF AdjHashMap) fawlty_towers
-          --     , bench "HashSP" $
-          --       nf (parseTtlRDF :: T.Text -> RDF HashSP) fawlty_towers
-          --     , bench "SP" $ nf (parseTtlRDF :: T.Text -> RDF SP) fawlty_towers
-          --     , bench "TList" $
-          --       nf (parseTtlRDF :: T.Text -> RDF TList) fawlty_towers
-          --     ]
-          -- ,
-          [ bgroup
+           [ bgroup
+              "parsers"
+              [ bench "ntriples-parsec" $
+                nf (\t ->
+                      let res = parseNTriplesStringParsec t :: Either ParseFailure (RDF TList)
+                      in case res of
+                        Left e -> error (show e)
+                        Right rdfG -> rdfG
+                   ) fawltyContentNTriples
+              , bench "ntriples-attoparsec" $
+                nf (\t ->
+                      let res = parseNTriplesStringAttoparsec t :: Either ParseFailure (RDF TList)
+                      in case res of
+                        Left e -> error (show e)
+                        Right rdfG -> rdfG
+                   ) fawltyContentNTriples
+              , bench "turtle-parsec" $
+                nf (\t ->
+                      let res = parseTurtleStringParsec Nothing Nothing t :: Either ParseFailure (RDF TList)
+                      in case res of
+                        Left e -> error (show e)
+                        Right rdfG -> rdfG
+                   ) fawltyContentTurtle
+              , bench "turtle-attoparsec" $
+                nf (\t ->
+                      let res = parseTurtleStringAttoparsec Nothing Nothing t :: Either ParseFailure (RDF TList)
+                      in case res of
+                        Left e -> error (show e)
+                        Right rdfG -> rdfG
+                   ) fawltyContentTurtle
+              ]
+          ,
+            bgroup
               "query"
               (queryBench "TList" triplesList ++
                queryBench "AdjHashMap" adjMap

diff --git a/rdf4h.cabal b/rdf4h.cabal
@@ -40,7 +40,8 @@ library
                  , Text.RDF.RDF4H.NTriplesParser
                  , Text.RDF.RDF4H.NTriplesSerializer
                  , Text.RDF.RDF4H.XmlParser
-  build-depends:   base >= 4.8.0.0
+  build-depends:   attoparsec >= 0.13.1.0
+                 , base >= 4.8.0.0
                  , bytestring
                  , directory
                  , containers
@@ -55,6 +56,8 @@ library
                  , text-binary
                  , utf8-string
                  , hgal
+                 , parsers
+                 , mtl
   if impl(ghc < 7.6)
     build-depends: ghc-prim
 

diff --git a/src/Data/RDF/Types.hs b/src/Data/RDF/Types.hs
@@ -1,9 +1,7 @@
 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
-{-# LANGUAGE StandaloneDeriving #-}
 {-# LANGUAGE TypeFamilies #-}
 {-# LANGUAGE DeriveGeneric #-}
 {-# LANGUAGE OverloadedStrings #-}
-{-# LANGUAGE GeneralizedNewtypeDeriving #-}
 
 module Data.RDF.Types (
 
@@ -48,7 +46,6 @@ import qualified Data.Text as T
 import System.IO
 import Text.Printf
 import Data.Binary
-import Control.Monad (guard)
 import Data.Map(Map)
 import Data.Maybe (fromJust)
 import GHC.Generics (Generic)
@@ -57,11 +54,14 @@ import qualified Data.List as List
 import qualified Data.Map as Map
 import qualified Network.URI as Network (uriPath,parseURI)
 import Control.DeepSeq (NFData,rnf)
-import Text.Parsec
-import Text.Parsec.Text
+import Text.Parsec(ParseError,parse)
 import Network.URI
 import Codec.Binary.UTF8.String
 
+import Text.Parser.Char
+import Text.Parser.Combinators
+import Control.Applicative
+
 -------------------
 -- LValue and constructor functions
 
@@ -177,19 +177,17 @@ isRdfURI :: T.Text -> Either ParseError T.Text
 isRdfURI t = parse (isRdfURIParser  <* eof) ("Invalid URI: " ++ T.unpack t) t
 
 -- [18]	IRIREF from Turtle spec
-isRdfURIParser :: GenParser () T.Text
-isRdfURIParser = T.concat <$> many (T.singleton <$> noneOf (['\x00'..'\x20'] ++ [' ','<','>','"','{','}','|','^','`','\\']) <|> nt_uchar)
+isRdfURIParser :: CharParsing m => m T.Text
+isRdfURIParser = T.concat <$> many (T.singleton <$> noneOf (['\x00'..'\x20'] ++ " <>\"{}|^`\\") <|> nt_uchar)
 
 -- [10] UCHAR
-nt_uchar :: GenParser () T.Text
+nt_uchar :: CharParsing m => m T.Text
 nt_uchar =
-    (try (char '\\' >> char 'u' >> count 4 hexDigit >>= \cs -> return $ T.pack (uEscapedToXEscaped cs)) <|>
-     try (char '\\' >> char 'U' >> count 8 hexDigit >>= \cs -> return $ T.pack (uEscapedToXEscaped cs)))
+    try (T.pack . uEscapedToXEscaped <$> (string "\\u" *> count 4 hexDigit)) <|>
+    try (T.pack . uEscapedToXEscaped <$> (string "\\U" *> count 8 hexDigit))
 
 uEscapedToXEscaped :: String -> String
-uEscapedToXEscaped ss =
-    let str = ['\\','x'] ++ ss
-    in read ("\"" ++ str ++ "\"")
+uEscapedToXEscaped ss = read ("\"\\x" ++ ss ++ "\"")
 
 -- |Validate a Text URI and return it in a @Just Text@ if it is
 --  valid, otherwise @Nothing@ is returned. See 'unodeValidate'.
@@ -207,47 +205,29 @@ uriValidateString t = case isRdfURIString of
     isRdfURIString = parse (isRdfURIParserS  <* eof) ("Invalid URI: " ++ t) t
     isRdfURIParserS = many (validUriChar <|> nt_ucharS)
     nt_ucharS =
-        (try (char '\\' >> char 'u' >> count 4 hexDigit >>= return . head . uEscapedToXEscaped) <|>
-         try (char '\\' >> char 'U' >> count 8 hexDigit >>= return . head . uEscapedToXEscaped))
+        try (head . uEscapedToXEscaped <$> (string "\\u" *> count 4 hexDigit)) <|>
+        try (head . uEscapedToXEscaped <$> (string "\\U" *> count 8 hexDigit))
     -- [18]	IRIREF from Turtle spec
-    validUriChar = try $ do
-        c <- anyChar
-        guard $ not (c >= '\x00' && c <= '\x20') && c `notElem` [' ','<','>','"','{','}','|','^','`','\\']
-        return c
+    validUriChar = try $ satisfy $ \c ->
+      not (c >= '\x00' && c <= '\x20')
+      && c `notElem` [' ','<','>','"','{','}','|','^','`','\\']
 
 -- | Escapes @\Uxxxxxxxx@ and @\uxxxx@ character sequences according
 --   to the RDF specification.
 escapeRDFSyntax :: T.Text -> T.Text
 escapeRDFSyntax t = T.pack uri
     where
       Right uri = parse unicodeEscParser "" (T.unpack t)
-      unicodeEscParser :: Stream s m Char => ParsecT s u m String
-      unicodeEscParser = do
-                ss <- many (
-                    try (do { _ <- char '\\'
-                            ; _ <- char 'U'
-                            ; pos1 <- hexDigit
-                            ; pos2 <- hexDigit
-                            ; pos3 <- hexDigit
-                            ; pos4 <- hexDigit
-                            ; pos5 <- hexDigit
-                            ; pos6 <- hexDigit
-                            ; pos7 <- hexDigit
-                            ; pos8 <- hexDigit
-                            ; let str = ['\\','x',pos1,pos2,pos3,pos4,pos5,pos6,pos7,pos8]
-                            ; return (read ("\"" ++ str ++ "\"") :: String)})
+      unicodeEscParser :: (CharParsing m, Monad m) => m String
+      unicodeEscParser =
+                concat <$> many (
+                    try (do { str <- ("\\x"++) <$> (string "\\U" *> count 8 hexDigit)
+                            ; pure (read ("\"" ++ str ++ "\"") :: String)})
                    <|>
-                    try (do { _ <- char '\\'
-                            ; _ <- char 'u'
-                            ; pos1 <- hexDigit
-                            ; pos2 <- hexDigit
-                            ; pos3 <- hexDigit
-                            ; pos4 <- hexDigit
-                            ; let str = ['\\','x',pos1,pos2,pos3,pos4]
-                            ; return (read ("\"" ++ str ++ "\"") :: String)})
-                   <|>
-                    (anyChar >>= \c -> return [c]))
-                return (concat ss :: String)
+                    try (do { str <- ("\\x"++) <$> (string "\\u" *> count 4 hexDigit)
+                            ; pure (read ("\"" ++ str ++ "\"") :: String)})
+                   <|> (pure <$> anyChar)
+                   )
 
 
 -- |Return a blank node using the given string identifier.
@@ -335,7 +315,7 @@ data family RDF a
 -- For more information about the concept of an RDF graph, see
 -- the following: <http://www.w3.org/TR/rdf-concepts/#section-rdf-graph>.
 class (Generic rdfImpl, NFData rdfImpl) => Rdf rdfImpl where
-  
+
   -- |Return the base URL of this RDF, if any.
   baseUrl :: RDF rdfImpl -> Maybe BaseUrl
 
@@ -411,7 +391,7 @@ class (Generic rdfImpl, NFData rdfImpl) => Rdf rdfImpl where
   showGraph     :: RDF rdfImpl -> String
 
 instance (Rdf a) => Show (RDF a) where
-  show a = showGraph a
+  show = showGraph
 
 -- |An RdfParser is a parser that knows how to parse 1 format of RDF and
 -- can parse an RDF document of that type from a string, a file, or a URL.
@@ -747,10 +727,9 @@ _decimalStr s =     -- haskell double parser doesn't handle '1.'..,
 
 -- | Removes "file://" schema from URIs in 'UNode' nodes
 fileSchemeToFilePath :: Node -> Maybe T.Text
-fileSchemeToFilePath (UNode fileScheme) =
-    if T.pack "file://" `T.isPrefixOf` fileScheme
-    then fmap (T.pack . Network.uriPath) (Network.parseURI (T.unpack fileScheme))
-    else if T.pack "http://" `T.isPrefixOf` fileScheme
-         then fmap (T.pack . Network.uriPath) (Network.parseURI (T.unpack fileScheme))
-         else Nothing
+fileSchemeToFilePath (UNode fileScheme)
+    | T.pack "file://" `T.isPrefixOf` fileScheme
+      = fmap (T.pack . Network.uriPath) (Network.parseURI (T.unpack fileScheme))
+    | T.pack "http://" `T.isPrefixOf` fileScheme
+      = fmap (T.pack . Network.uriPath) (Network.parseURI (T.unpack fileScheme))
 fileSchemeToFilePath _ = Nothing