Skip to content

Commit

Permalink
Replace XHTML mode with content types
Browse files Browse the repository at this point in the history
  • Loading branch information
dahlia committed Nov 8, 2021
1 parent b3c119f commit d78fe92
Show file tree
Hide file tree
Showing 12 changed files with 262 additions and 38 deletions.
32 changes: 32 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,42 @@ To be released.

- Since this version, it requires GHC 8.8.* at least, and supports GHC 9.0.*
at most.

- Now supports several content types besides HTML/XHTML. [[#18]]

The below Haskell APIs changed:

- Added `Text.Seonbi.ContentTypes` module.
- Added `contentType` field for `Configuration m a`.
- Removed `xhtml` field for `Configuration m a` in favour of
new `contentType` field for the same type.

The below CLI options changed:

- Added `-t`/`--content-type` option with the default value `text/html`.
- Removed Removed `-x`/`--xhtml` option in favour of new
`-t`/`--content-type` option. In order to use XHTML mode, give it
`-t application/xhtml+xml` option.

The below HTTP APIs changed:

- Added an optional field `"contentType"` with the default value
`"text/html"`.
- Removed `"xhtml"` field in favour of new `"contentType"` field.
In order to use XHTML mode, configure `"contentType"` field with
`"application/xhtml+xml"`.

- Added `Text.Seonbi.Html.Lang` module.

- Some transformations inappropriate for non-Korean contents are no more
applied to elements written in other languages than Korean. The below
functions respect elements `lang` attributes: [[#10]]

- `Text.Seonbi.Hanja.phoneticizeHanja`
- `Text.Seonbi.Punctuation.normalizeStops`

- Removed several functions from `Text.Seonbi.Trie` module:

- `toListBy`
- `lookupBy`
- `submap`
Expand All @@ -25,15 +54,18 @@ To be released.
- `delete`
- `mapBy`
- `filterMap`

- `Text.Seonbi.Trie.Trie` type is not an instance of the following typeclasses
anymore:

- `Generic a => Generic (Trie a)`
- `Binary a => Binary (Trie a)`
- `Generic1 Trie`
- `type Rep (Trie a)`
- `type Rep1 Trie`

[#10]: https://github.com/dahlia/seonbi/issues/10
[#18]: https://github.com/dahlia/seonbi/issues/18


Version 0.2.3
Expand Down
16 changes: 14 additions & 2 deletions app/seonbi-api.hs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import Data.Aeson
import qualified Data.Aeson.Types
import qualified Data.ByteString as B
import qualified Data.Map.Strict as M
import qualified Data.Set as S
import Data.Text
import Data.Text.Encoding
import Network.Wai
Expand Down Expand Up @@ -50,7 +51,7 @@ instance FromJSON Input where
, intercalate ", " (M.keys presets')
]
Nothing -> do
xhtml' <- v .:? "xhtml" .!= False
contentType' <- v .:? "contentType" .!= "text/html"
quote' <- v .:? "quote"
cite' <- v .:? "cite"
arrow' <- v .:? "arrow"
Expand All @@ -60,7 +61,7 @@ instance FromJSON Input where
hanja' <- v .:? "hanja" .!= Nothing
return Configuration
{ debugLogger = Nothing
, xhtml = xhtml'
, contentType = contentType'
, quote = quote'
, cite = cite'
, arrow = arrow'
Expand All @@ -71,6 +72,17 @@ instance FromJSON Input where
}
return $ Input sourceHtml' config

instance FromJSON ContentType where
parseJSON = withText "ContentType" $ \ t ->
if contentTypeFromText t `S.member` contentTypes
then return (contentTypeFromText t)
else fail $ unpack $ Data.Text.concat
[ "Unknown content type: "
, t
, "; available content types: "
, intercalate ", " $ contentTypeText <$> S.elems contentTypes
]

instance FromJSON QuoteOption
instance FromJSON CiteOption
instance FromJSON ArrowOption
Expand Down
28 changes: 19 additions & 9 deletions app/seonbi.hs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import Codec.Text.IConv
#endif
import Data.ByteString.Lazy
import Data.Map.Strict
import qualified Data.Set as S
import qualified Data.Text as T
import Data.Text.Lazy
import Data.Text.Lazy.Encoding
Expand Down Expand Up @@ -92,7 +93,7 @@ data Seonbi = Seonbi
, config :: Configuration IO ()
, dictionaries :: [FilePath]
, noKrStdict :: Bool
, xhtml :: Bool
, contentType' :: ContentType
, debug :: Bool
, version :: Bool
, input :: FilePath
Expand Down Expand Up @@ -137,8 +138,10 @@ enumKeyword :: (Enum a, Show a) => a -> String
enumKeyword = T.unpack . enumKeyword'

enumKeywords :: forall a . (Enum a, Show a) => Proxy a -> String
enumKeywords _ = T.unpack $ T.intercalate ", " $
fmap enumKeyword' [(toEnum 0 :: a) ..]
enumKeywords _ = commas $ enumKeyword' <$> [(toEnum 0 :: a) ..]

commas :: [T.Text] -> String
commas = T.unpack . T.intercalate ", "

parser :: Parser Seonbi
parser = Seonbi
Expand Down Expand Up @@ -166,7 +169,7 @@ parser = Seonbi
"Available presets: " ++
Data.List.intercalate ", " (Data.Map.Strict.keys presets'))
)
<|> ( Configuration Nothing False
<|> ( Configuration Nothing "text/html"
<$> ( flag' Nothing
( long "no-quote"
<> short 'Q'
Expand Down Expand Up @@ -297,10 +300,14 @@ parser = Seonbi
<> help ("Do not use Standard Korean Language Dictionary " ++
"(標準國語大辭典) by South Korean NIKL (國立國語院)")
)
<*> switch
( long "xhtml"
<> short 'x'
<> help "XHTML mode"
<*> strOption
( long "content-type"
<> short 't'
<> metavar "TYPE"
<> value "text/html"
<> help ("Content type. Available types: " ++ commas
(contentTypeText <$> S.elems contentTypes) ++
" [default: text/html]")
)
<*> switch
( long "debug"
Expand Down Expand Up @@ -347,6 +354,7 @@ main = do
, config
, dictionaries
, noKrStdict
, contentType'
, debug
, version
, input
Expand All @@ -371,6 +379,7 @@ main = do
{ debugLogger = debugLogger'
, hanja = Just hanja' { reading = reading' }
}
let configWithContentType = config' { contentType = contentType' }
when version $ do
Prelude.putStrLn $ showVersion Meta.version
exitSuccess
Expand All @@ -390,7 +399,8 @@ main = do
enc -> enc
debugPrint ("encoding: " ++ encodingName)
result <- catchIOError
(transformHtmlLazyText config' $ toUnicode encodingName contents)
(transformHtmlLazyText configWithContentType $
toUnicode encodingName contents)
(\ e -> hPutStrLn stderr (ioeGetErrorString e) >> exitFailure)
let resultBytes = fromUnicode encodingName result
if output == "-"
Expand Down
9 changes: 8 additions & 1 deletion demo/src/Demo.elm
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,14 @@ makeInput source =
[ ( "preset", Json.Encode.string "ko-kp" ) ]

Custom options ->
[ ( "xhtml", Json.Encode.bool options.xhtml )
[ ( "contentType"
, Json.Encode.string <|
if options.xhtml then
"application/xhtml+xml"

else
"text/html"
)
, ( "quote"
, Json.Encode.string <|
case options.quote of
Expand Down
1 change: 1 addition & 0 deletions package.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ library:
- attoparsec >= 0.12 && < 1
- bytestring-trie >= 0.2.5 && < 0.3
- cassava >= 0.5 && < 0.6
- case-insensitive >= 1 && < 2
- data-default >= 0.2 && < 1
- filepath >= 1 && < 2
- file-embed >= 0.0.10 && < 0.0.16
Expand Down
4 changes: 2 additions & 2 deletions scripts/deno/mod.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ export type Dictionary = "kr-stdict";
* See also <https://github.com/dahlia/seonbi#http-api>.
*/
export interface Options {
/** Whether to format the result in XHTML. */
xhtml: boolean;
/** Content type. */
type: "text/html" | "application/xhtml+xml";
/** Quoting options. */
quote:
| "CurvedQuotes"
Expand Down
2 changes: 1 addition & 1 deletion scripts/deno/test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import {
import { assertEquals } from "https://deno.land/std@0.106.0/testing/asserts.ts";

const hanjaInParens: Options = {
xhtml: false,
contentType: "text/html",
quote: "CurvedQuotes",
cite: null,
arrow: null,
Expand Down
6 changes: 3 additions & 3 deletions scripts/showcase-svg/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,21 +17,21 @@ build/showcase.svg: template.svg build/ko-kr.html build/ko-kp.html build/ko-kore
build/ko-kr.html: build/input.html
$(SEONBI) \
--preset ko-kr \
--xhtml \
--content-type application/xhtml+xml\
--output build/ko-kr.html \
build/input.html

build/ko-kp.html: build/input.html
$(SEONBI) \
--preset ko-kp \
--xhtml \
--content-type application/xhtml+xml\
--output build/ko-kp.html \
build/input.html

build/ko-kore.html: build/input.html
$(SEONBI) \
--render-hanja hanja-in-ruby \
--xhtml \
--content-type application/xhtml+xml\
--output build/ko-kore.html \
build/input.html

Expand Down
125 changes: 125 additions & 0 deletions src/Text/Seonbi/ContentTypes.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
module Text.Seonbi.ContentTypes
( ContentType
, HtmlTransformer
, TextTransformer
, asHtmlTransformer
, asHtmlTransformer'
, asXhtmlTransformer
, contentTypeFromText
, contentTypes
, contentTypeText
, transformWithContentType
) where

#if MIN_VERSION_base(4,13,0)
import Prelude hiding (MonadFail)
#endif

import Control.Monad.Fail (MonadFail)
import Data.List

import Data.CaseInsensitive
import Data.Set
import Data.Text as ST
import Data.Text.Lazy as LT

import Text.Seonbi.Html

-- | Represents a function that transforms an 'HtmlEntity' list.
type HtmlTransformer m
= (Monad m, MonadFail m) => [HtmlEntity] -> m [HtmlEntity]

-- | Represents a function that transforms a text.
type TextTransformer m
= (Monad m, MonadFail m) => LT.Text -> m LT.Text

-- | Represents a function that transforms an 'HtmlTransformer' into
-- a 'TextTransformer'.
type TransformerTransformer m
= (Monad m, MonadFail m) => HtmlTransformer m -> TextTransformer m

-- | Gets a 'TransformerTransformer' that transforms 'HtmlTransformer' into
-- a 'TextTransformer' which transforms an HTML/XHTML text.
asHtmlTransformer'
:: (Monad m, MonadFail m)
=> Bool
-- ^ 'True' for XHTML, and 'False' for HTML.
-> TransformerTransformer m
-- ^ A 'TransformerTransformer' that transforms an 'HtmlTransformer' into
-- a 'TextTransformer' which transforms an HTML/XHTML text.
asHtmlTransformer' xhtml transformer htmlText = do
case scanHtml htmlText of
Done "" input -> do
output <- transformer input
return $ printHtml' output
_ ->
fail "failed to parse input"
where
printHtml' :: [HtmlEntity] -> LT.Text
printHtml'
| xhtml = printXhtml
| otherwise = printHtml

-- | Transforms an 'HtmlTransformer' into a 'TextTransformer' which transforms
-- an HTML text.
asHtmlTransformer :: (Monad m, MonadFail m) => TransformerTransformer m
asHtmlTransformer = asHtmlTransformer' False

-- | Transforms an 'HtmlTransformer' into a 'TextTransformer' which transforms
-- an XHTML text.
asXhtmlTransformer :: (Monad m, MonadFail m) => TransformerTransformer m
asXhtmlTransformer = asHtmlTransformer' True

-- | Represents a case-insensitive content type.
type ContentType = CI ST.Text

-- | Converts a 'Text' to a 'ContentType'.
contentTypeFromText :: ST.Text -> ContentType
contentTypeFromText = mk

-- | Converts a 'ContentType' to a 'Text'.
contentTypeText :: ContentType -> ST.Text
contentTypeText = original

newtype TransformerTransformer' m =
TransformerTransformer' (TransformerTransformer m)
transformers :: (Monad m, MonadFail m)
=> [(ContentType, TransformerTransformer' m)]
transformers =
[ ("text/html", TransformerTransformer' asHtmlTransformer)
, ("application/xhtml+xml", TransformerTransformer' asXhtmlTransformer)
]

-- | Supported content types.
contentTypes :: Set ContentType
contentTypes = (Data.Set.fromList . Prelude.map fst)
(transformers :: [(ContentType, TransformerTransformer' IO)])

getTransformerTransformer :: (Monad m, MonadFail m)
=> ContentType
-> Maybe (TransformerTransformer' m)
getTransformerTransformer contentType =
snd <$> Data.List.find ((== contentType) . fst) transformers

-- | Applies an 'HtmlTransformer' to the given text with respect to the
-- given content type.
transformWithContentType
:: (Monad m, MonadFail m)
=> ContentType
-- ^ A content type. If the content type is unsupported (i.e. not in
-- 'contentTypes'), this function fails.
-> HtmlTransformer m
-- ^ An 'HtmlTransformer' to apply.
-> LT.Text
-- ^ A input text to transform.
-> m LT.Text
-- ^ A transformed text.
transformWithContentType contentType transformer inputText =
case getTransformerTransformer contentType of
Nothing -> fail $ ST.unpack $
"unknown content type: " <> contentTypeText contentType
Just (TransformerTransformer' transformTransformer) ->
transformTransformer transformer inputText
Loading

0 comments on commit d78fe92

Please sign in to comment.