{-# LANGUAGE CPP #-}
{-# LANGUAGE BangPatterns        #-}
{-# LANGUAGE BinaryLiterals      #-}
{-# LANGUAGE LambdaCase          #-}
{-# LANGUAGE NamedFieldPuns      #-}
{-# LANGUAGE OverloadedStrings   #-}
{-# LANGUAGE ScopedTypeVariables #-}
-- | SAX parser and API for XML.
module Xeno.SAX
  ( process
  , Process(..)
  , StringLike(..)
  , fold
  , validate
  , validateEx
  , dump
  , skipDoctype
  ) where

import           Control.Exception (throw)
import           Control.Monad (unless)
import           Control.Monad.ST (ST, runST)
import           Control.Monad.State.Strict (State, evalStateT, execState, modify', lift, get, put)
import           Control.Spork (spork)
import           Data.Bits (testBit)
import           Data.ByteString (ByteString)
import qualified Data.ByteString as S
import qualified Data.ByteString.Char8 as S8
import qualified Data.ByteString.Unsafe as SU
import           Data.Char (isSpace)
import           Data.Functor.Identity (Identity(..))
import           Data.Semigroup ()
import           Data.STRef (newSTRef, modifySTRef', readSTRef)
import           Data.Word (Word8, Word64)
import           Xeno.Types


class StringLike str where
    s_index'       :: str -> Int -> Word8
    elemIndexFrom' :: Word8 -> str -> Int -> Maybe Int
    drop'          :: Int -> str -> str
    substring'     :: str -> Int -> Int -> ByteString
    toBS           :: str -> ByteString

instance StringLike ByteString where
    s_index' :: ByteString -> Int -> Word8
s_index'       = ByteString -> Int -> Word8
s_index
    {-# INLINE s_index' #-}
    elemIndexFrom' :: Word8 -> ByteString -> Int -> Maybe Int
elemIndexFrom' = Word8 -> ByteString -> Int -> Maybe Int
elemIndexFrom
    {-# INLINE elemIndexFrom' #-}
    drop' :: Int -> ByteString -> ByteString
drop'          = Int -> ByteString -> ByteString
S.drop
    {-# INLINE drop' #-}
    substring' :: ByteString -> Int -> Int -> ByteString
substring'     = ByteString -> Int -> Int -> ByteString
substring
    {-# INLINE substring' #-}
    toBS :: ByteString -> ByteString
toBS           = ByteString -> ByteString
forall a. a -> a
id
    {-# INLINE toBS #-}

instance StringLike ByteStringZeroTerminated where
    s_index' :: ByteStringZeroTerminated -> Int -> Word8
s_index' (BSZT ByteString
ps) Int
n = ByteString
ps ByteString -> Int -> Word8
`SU.unsafeIndex` Int
n
    {-# INLINE s_index' #-}
    elemIndexFrom' :: Word8 -> ByteStringZeroTerminated -> Int -> Maybe Int
elemIndexFrom' Word8
w (BSZT ByteString
bs) Int
i = Word8 -> ByteString -> Int -> Maybe Int
elemIndexFrom Word8
w ByteString
bs Int
i
    {-# INLINE elemIndexFrom' #-}
    drop' :: Int -> ByteStringZeroTerminated -> ByteStringZeroTerminated
drop' Int
i (BSZT ByteString
bs) = ByteString -> ByteStringZeroTerminated
BSZT (ByteString -> ByteStringZeroTerminated)
-> ByteString -> ByteStringZeroTerminated
forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
S.drop Int
i ByteString
bs
    {-# INLINE drop' #-}
    substring' :: ByteStringZeroTerminated -> Int -> Int -> ByteString
substring' (BSZT ByteString
bs) Int
s Int
t = ByteString -> Int -> Int -> ByteString
substring ByteString
bs Int
s Int
t
    {-# INLINE substring' #-}
    toBS :: ByteStringZeroTerminated -> ByteString
toBS (BSZT ByteString
bs) = ByteString
bs
    {-# INLINE toBS #-}

-- | Parameters to the 'process' function
data Process a =
  Process {
      forall a. Process a -> ByteString -> a
openF    :: !(ByteString ->               a) -- ^ Open tag.
    , forall a. Process a -> ByteString -> ByteString -> a
attrF    :: !(ByteString -> ByteString -> a) -- ^ Tag attribute.
    , forall a. Process a -> ByteString -> a
endOpenF :: !(ByteString ->               a) -- ^ End open tag.
    , forall a. Process a -> ByteString -> a
textF    :: !(ByteString ->               a) -- ^ Text.
    , forall a. Process a -> ByteString -> a
closeF   :: !(ByteString ->               a) -- ^ Close tag.
    , forall a. Process a -> ByteString -> a
cdataF   :: !(ByteString ->               a) -- ^ CDATA.
    }

--------------------------------------------------------------------------------
-- Helpful interfaces to the parser

-- | Parse the XML but return no result, process no events.
--
-- N.B.: Only the lexical correctness of the input string is checked, not its XML semantics (e.g. only if tags are well formed, not whether tags are properly closed)
--
-- > > :set -XOverloadedStrings
-- > > validate "<b>"
-- > True
--
-- > > validate "<b"
-- > False
validate :: (StringLike str) => str -> Bool
validate :: forall str. StringLike str => str -> Bool
validate str
s =
  case () -> Either XenoException ()
forall e a. Exception e => a -> Either e a
spork
         (Identity () -> ()
forall a. Identity a -> a
runIdentity
            (Process (Identity ()) -> str -> Identity ()
forall (m :: * -> *) str.
(Monad m, StringLike str) =>
Process (m ()) -> str -> m ()
process
               Process {
                 openF :: ByteString -> Identity ()
openF    = \ByteString
_   -> () -> Identity ()
forall a. a -> Identity a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
               , attrF :: ByteString -> ByteString -> Identity ()
attrF    = \ByteString
_ ByteString
_ -> () -> Identity ()
forall a. a -> Identity a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
               , endOpenF :: ByteString -> Identity ()
endOpenF = \ByteString
_   -> () -> Identity ()
forall a. a -> Identity a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
               , textF :: ByteString -> Identity ()
textF    = \ByteString
_   -> () -> Identity ()
forall a. a -> Identity a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
               , closeF :: ByteString -> Identity ()
closeF   = \ByteString
_   -> () -> Identity ()
forall a. a -> Identity a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
               , cdataF :: ByteString -> Identity ()
cdataF   = \ByteString
_   -> () -> Identity ()
forall a. a -> Identity a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
               }
               str
s)) of
    Left (XenoException
_ :: XenoException) -> Bool
False
    Right ()
_ -> Bool
True
-- It must be inlined or specialised to ByteString/ByteStringZeroTerminated
{-# INLINE validate #-}
{-# SPECIALISE validate :: ByteString -> Bool #-}
{-# SPECIALISE validate :: ByteStringZeroTerminated -> Bool #-}


-- | Parse the XML and checks tags nesting.
--
validateEx :: (StringLike str) => str -> Bool
validateEx :: forall str. StringLike str => str -> Bool
validateEx str
s =
  case () -> Either XenoException ()
forall e a. Exception e => a -> Either e a
spork
         ((forall s. ST s ()) -> ()
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s ()) -> ()) -> (forall s. ST s ()) -> ()
forall a b. (a -> b) -> a -> b
$ do
            STRef s [ByteString]
tags <- [ByteString] -> ST s (STRef s [ByteString])
forall a s. a -> ST s (STRef s a)
newSTRef []
            (Process (ST s ()) -> str -> ST s ()
forall (m :: * -> *) str.
(Monad m, StringLike str) =>
Process (m ()) -> str -> m ()
process
               Process {
                 openF :: ByteString -> ST s ()
openF    = \ByteString
tag   -> STRef s [ByteString] -> ([ByteString] -> [ByteString]) -> ST s ()
forall s a. STRef s a -> (a -> a) -> ST s ()
modifySTRef' STRef s [ByteString]
tags (ByteString
tagByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
:)
               , attrF :: ByteString -> ByteString -> ST s ()
attrF    = \ByteString
_ ByteString
_ -> () -> ST s ()
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
               , endOpenF :: ByteString -> ST s ()
endOpenF = \ByteString
_   -> () -> ST s ()
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
               , textF :: ByteString -> ST s ()
textF    = \ByteString
_   -> () -> ST s ()
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
               , closeF :: ByteString -> ST s ()
closeF   = \ByteString
tag  ->
                   STRef s [ByteString] -> ([ByteString] -> [ByteString]) -> ST s ()
forall s a. STRef s a -> (a -> a) -> ST s ()
modifySTRef' STRef s [ByteString]
tags (([ByteString] -> [ByteString]) -> ST s ())
-> ([ByteString] -> [ByteString]) -> ST s ()
forall a b. (a -> b) -> a -> b
$ \case
                      [] -> [Char] -> [ByteString]
forall a. [Char] -> [a]
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail ([Char] -> [ByteString]) -> [Char] -> [ByteString]
forall a b. (a -> b) -> a -> b
$ [Char]
"Unexpected close tag \"" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ ByteString -> [Char]
forall a. Show a => a -> [Char]
show ByteString
tag [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
"\""
                      (ByteString
expectedTag:[ByteString]
tags') ->
                          if ByteString
expectedTag ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString
tag
                          then [ByteString]
tags'
                          else [Char] -> [ByteString]
forall a. [Char] -> [a]
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail ([Char] -> [ByteString]) -> [Char] -> [ByteString]
forall a b. (a -> b) -> a -> b
$ [Char]
"Unexpected close tag. Expected \"" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ ByteString -> [Char]
forall a. Show a => a -> [Char]
show ByteString
expectedTag [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
"\", but got \"" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ ByteString -> [Char]
forall a. Show a => a -> [Char]
show ByteString
tag [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
"\""
               , cdataF :: ByteString -> ST s ()
cdataF   = \ByteString
_   -> () -> ST s ()
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
               }
               str
s)
            STRef s [ByteString] -> ST s [ByteString]
forall s a. STRef s a -> ST s a
readSTRef STRef s [ByteString]
tags ST s [ByteString] -> ([ByteString] -> ST s ()) -> ST s ()
forall a b. ST s a -> (a -> ST s b) -> ST s b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
                [] -> () -> ST s ()
forall a. a -> ST s a
forall (m :: * -> *) a. Monad m => a -> m a
return ()
                [ByteString]
tags' -> [Char] -> ST s ()
forall a. HasCallStack => [Char] -> a
error ([Char] -> ST s ()) -> [Char] -> ST s ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Not all tags closed: " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [ByteString] -> [Char]
forall a. Show a => a -> [Char]
show [ByteString]
tags'
         ) of
    Left (XenoException
_ :: XenoException) -> Bool
False
    Right ()
_ -> Bool
True
{-# INLINE validateEx #-}
{-# SPECIALISE validateEx :: ByteString -> Bool #-}
{-# SPECIALISE validateEx :: ByteStringZeroTerminated -> Bool #-}


-- | Parse the XML and pretty print it to stdout.
dump :: ByteString -> IO ()
dump :: ByteString -> IO ()
dump ByteString
str =
  StateT Int IO () -> Int -> IO ()
forall (m :: * -> *) s a. Monad m => StateT s m a -> s -> m a
evalStateT
    (Process (StateT Int IO ()) -> ByteString -> StateT Int IO ()
forall (m :: * -> *) str.
(Monad m, StringLike str) =>
Process (m ()) -> str -> m ()
process
       Process {
         openF :: ByteString -> StateT Int IO ()
openF = \ByteString
name -> do
          Int
level <- StateT Int IO Int
forall s (m :: * -> *). MonadState s m => m s
get
          IO () -> StateT Int IO ()
forall (m :: * -> *) a. Monad m => m a -> StateT Int m a
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (ByteString -> IO ()
S8.putStr (Int -> Char -> ByteString
S8.replicate Int
level Char
' ' ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
"<" ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
name ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
""))
       , attrF :: ByteString -> ByteString -> StateT Int IO ()
attrF = \ByteString
key ByteString
value -> IO () -> StateT Int IO ()
forall (m :: * -> *) a. Monad m => m a -> StateT Int m a
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (ByteString -> IO ()
S8.putStr (ByteString
" " ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
key ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
"=\"" ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
value ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
"\""))
       , endOpenF :: ByteString -> StateT Int IO ()
endOpenF = \ByteString
_ -> do
          Int
level <- StateT Int IO Int
forall s (m :: * -> *). MonadState s m => m s
get
          let !level' :: Int
level' = Int
level Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2
          Int -> StateT Int IO ()
forall s (m :: * -> *). MonadState s m => s -> m ()
put Int
level'
          IO () -> StateT Int IO ()
forall (m :: * -> *) a. Monad m => m a -> StateT Int m a
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (ByteString -> IO ()
S8.putStrLn (ByteString
">"))
       , textF :: ByteString -> StateT Int IO ()
textF = \ByteString
text -> do
          Int
level <- StateT Int IO Int
forall s (m :: * -> *). MonadState s m => m s
get
          IO () -> StateT Int IO ()
forall (m :: * -> *) a. Monad m => m a -> StateT Int m a
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (ByteString -> IO ()
S8.putStrLn (Int -> Char -> ByteString
S8.replicate Int
level Char
' ' ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> [Char] -> ByteString
S8.pack (ByteString -> [Char]
forall a. Show a => a -> [Char]
show ByteString
text)))
       , closeF :: ByteString -> StateT Int IO ()
closeF = \ByteString
name -> do
          Int
level <- StateT Int IO Int
forall s (m :: * -> *). MonadState s m => m s
get
          let !level' :: Int
level' = Int
level Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
2
          Int -> StateT Int IO ()
forall s (m :: * -> *). MonadState s m => s -> m ()
put Int
level'
          IO () -> StateT Int IO ()
forall (m :: * -> *) a. Monad m => m a -> StateT Int m a
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (ByteString -> IO ()
S8.putStrLn (Int -> Char -> ByteString
S8.replicate Int
level' Char
' ' ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
"</" ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
name ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
">"))
       , cdataF :: ByteString -> StateT Int IO ()
cdataF = \ByteString
cdata -> do
          Int
level <- StateT Int IO Int
forall s (m :: * -> *). MonadState s m => m s
get
          IO () -> StateT Int IO ()
forall (m :: * -> *) a. Monad m => m a -> StateT Int m a
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (ByteString -> IO ()
S8.putStrLn (Int -> Char -> ByteString
S8.replicate Int
level Char
' ' ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
"CDATA: " ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> [Char] -> ByteString
S8.pack (ByteString -> [Char]
forall a. Show a => a -> [Char]
show ByteString
cdata)))
       }
       ByteString
str)
    (Int
0 :: Int)

-- | Fold over the XML input.
fold
  :: (s -> ByteString -> s) -- ^ Open tag.
  -> (s -> ByteString -> ByteString -> s) -- ^ Attribute key/value.
  -> (s -> ByteString -> s) -- ^ End of open tag.
  -> (s -> ByteString -> s) -- ^ Text.
  -> (s -> ByteString -> s) -- ^ Close tag.
  -> (s -> ByteString -> s) -- ^ CDATA.
  -> s
  -> ByteString
  -> Either XenoException s
fold :: forall s.
(s -> ByteString -> s)
-> (s -> ByteString -> ByteString -> s)
-> (s -> ByteString -> s)
-> (s -> ByteString -> s)
-> (s -> ByteString -> s)
-> (s -> ByteString -> s)
-> s
-> ByteString
-> Either XenoException s
fold s -> ByteString -> s
openF s -> ByteString -> ByteString -> s
attrF s -> ByteString -> s
endOpenF s -> ByteString -> s
textF s -> ByteString -> s
closeF s -> ByteString -> s
cdataF s
s ByteString
str =
  s -> Either XenoException s
forall e a. Exception e => a -> Either e a
spork
    (State s () -> s -> s
forall s a. State s a -> s -> s
execState
       (Process (State s ()) -> ByteString -> State s ()
forall (m :: * -> *) str.
(Monad m, StringLike str) =>
Process (m ()) -> str -> m ()
process Process {
            openF :: ByteString -> State s ()
openF    = \ByteString
name -> (s -> s) -> State s ()
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify' (\s
s' -> s -> ByteString -> s
openF s
s' ByteString
name)
          , attrF :: ByteString -> ByteString -> State s ()
attrF    = \ByteString
key ByteString
value -> (s -> s) -> State s ()
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify' (\s
s' -> s -> ByteString -> ByteString -> s
attrF s
s' ByteString
key ByteString
value)
          , endOpenF :: ByteString -> State s ()
endOpenF = \ByteString
name -> (s -> s) -> State s ()
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify' (\s
s' -> s -> ByteString -> s
endOpenF s
s' ByteString
name)
          , textF :: ByteString -> State s ()
textF    = \ByteString
text -> (s -> s) -> State s ()
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify' (\s
s' -> s -> ByteString -> s
textF s
s' ByteString
text)
          , closeF :: ByteString -> State s ()
closeF   = \ByteString
name -> (s -> s) -> State s ()
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify' (\s
s' -> s -> ByteString -> s
closeF s
s' ByteString
name)
          , cdataF :: ByteString -> State s ()
cdataF   = \ByteString
cdata -> (s -> s) -> State s ()
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify' (\s
s' -> s -> ByteString -> s
cdataF s
s' ByteString
cdata)
        } ByteString
str)
       s
s)

--------------------------------------------------------------------------------
-- Main parsing function

-- | Process events with callbacks in the XML input.
process
  :: (Monad m, StringLike str)
  => Process (m ())
  -> str
  -> m ()
process :: forall (m :: * -> *) str.
(Monad m, StringLike str) =>
Process (m ()) -> str -> m ()
process !(Process {ByteString -> m ()
openF :: forall a. Process a -> ByteString -> a
openF :: ByteString -> m ()
openF, ByteString -> ByteString -> m ()
attrF :: forall a. Process a -> ByteString -> ByteString -> a
attrF :: ByteString -> ByteString -> m ()
attrF, ByteString -> m ()
endOpenF :: forall a. Process a -> ByteString -> a
endOpenF :: ByteString -> m ()
endOpenF, ByteString -> m ()
textF :: forall a. Process a -> ByteString -> a
textF :: ByteString -> m ()
textF, ByteString -> m ()
closeF :: forall a. Process a -> ByteString -> a
closeF :: ByteString -> m ()
closeF, ByteString -> m ()
cdataF :: forall a. Process a -> ByteString -> a
cdataF :: ByteString -> m ()
cdataF}) str
str = Int -> m ()
findLT Int
0
  where
    findLT :: Int -> m ()
findLT Int
index =
      case Word8 -> str -> Int -> Maybe Int
forall str. StringLike str => Word8 -> str -> Int -> Maybe Int
elemIndexFrom' Word8
openTagChar str
str Int
index of
        Maybe Int
Nothing -> Bool -> m () -> m ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (ByteString -> Bool
S.null ByteString
text) (ByteString -> m ()
textF ByteString
text)
          where text :: ByteString
text = str -> ByteString
forall str. StringLike str => str -> ByteString
toBS (str -> ByteString) -> str -> ByteString
forall a b. (a -> b) -> a -> b
$ Int -> str -> str
forall str. StringLike str => Int -> str -> str
drop' Int
index str
str
        Just Int
fromLt -> do
          Bool -> m () -> m ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (ByteString -> Bool
S.null ByteString
text) (ByteString -> m ()
textF ByteString
text)
          Int -> m ()
checkOpenComment (Int
fromLt Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
          where text :: ByteString
text = str -> Int -> Int -> ByteString
forall str. StringLike str => str -> Int -> Int -> ByteString
substring' str
str Int
index Int
fromLt
    -- Find open comment, CDATA or tag name.
    checkOpenComment :: Int -> m ()
checkOpenComment Int
index
      | str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
this Int
0 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
bangChar -- !
      , str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
this Int
1 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
commentChar -- -
      , str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
this Int
2 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
commentChar -- -
      =  Int -> m ()
findCommentEnd (Int
index Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
3)

      | str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
this Int
0 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
bangChar -- !
      , str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
this Int
1 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
openAngleBracketChar -- [
      , str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
this Int
2 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
67 -- C
      , str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
this Int
3 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
68 -- D
      , str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
this Int
4 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
65 -- A
      , str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
this Int
5 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
84 -- T
      , str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
this Int
6 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
65 -- A
      , str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
this Int
7 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
openAngleBracketChar -- [
      =  Int -> Int -> m ()
findCDataEnd (Int
index Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
8) (Int
index Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
8)

      | Bool
otherwise
      = Int -> m ()
findTagName Int
index
      where
        this :: str
this = Int -> str -> str
forall str. StringLike str => Int -> str -> str
drop' Int
index str
str
    findCommentEnd :: Int -> m ()
findCommentEnd Int
index =
      case Word8 -> str -> Int -> Maybe Int
forall str. StringLike str => Word8 -> str -> Int -> Maybe Int
elemIndexFrom' Word8
commentChar str
str Int
index of
        Maybe Int
Nothing -> XenoException -> m ()
forall a e. Exception e => e -> a
throw (XenoException -> m ()) -> XenoException -> m ()
forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> XenoException
XenoParseError Int
index ByteString
"Couldn't find the closing comment dash."
        Just Int
fromDash ->
          if str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
this Int
0 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
commentChar Bool -> Bool -> Bool
&& str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
this Int
1 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
closeTagChar
            then Int -> m ()
findLT (Int
fromDash Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2)
            else Int -> m ()
findCommentEnd (Int
fromDash Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
          where this :: str
this = Int -> str -> str
forall str. StringLike str => Int -> str -> str
drop' Int
index str
str
    findCDataEnd :: Int -> Int -> m ()
findCDataEnd Int
cdata_start Int
index =
      case Word8 -> str -> Int -> Maybe Int
forall str. StringLike str => Word8 -> str -> Int -> Maybe Int
elemIndexFrom' Word8
closeAngleBracketChar str
str Int
index of
        Maybe Int
Nothing -> XenoException -> m ()
forall a e. Exception e => e -> a
throw (XenoException -> m ()) -> XenoException -> m ()
forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> XenoException
XenoParseError Int
index ByteString
"Couldn't find closing angle bracket for CDATA."
        Just Int
fromCloseAngleBracket ->
          if str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str (Int
fromCloseAngleBracket Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
closeAngleBracketChar
             then do
               ByteString -> m ()
cdataF (str -> Int -> Int -> ByteString
forall str. StringLike str => str -> Int -> Int -> ByteString
substring' str
str Int
cdata_start Int
fromCloseAngleBracket)
               Int -> m ()
findLT (Int
fromCloseAngleBracket Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
3) -- Start after ]]>
             else
               -- We only found one ], that means that we need to keep searching.
               Int -> Int -> m ()
findCDataEnd Int
cdata_start (Int
fromCloseAngleBracket Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
    findTagName :: Int -> m ()
findTagName Int
index0
      | str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str Int
index0 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
questionChar =
        case Word8 -> str -> Int -> Maybe Int
forall str. StringLike str => Word8 -> str -> Int -> Maybe Int
elemIndexFrom' Word8
closeTagChar str
str Int
spaceOrCloseTag of
          Maybe Int
Nothing -> XenoException -> m ()
forall a e. Exception e => e -> a
throw (XenoException -> m ()) -> XenoException -> m ()
forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> XenoException
XenoParseError Int
index ByteString
"Couldn't find the end of the tag."
          Just Int
fromGt -> do
            Int -> m ()
findLT (Int
fromGt Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
      | str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str Int
spaceOrCloseTag Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
closeTagChar = do
        let tagname :: ByteString
tagname = str -> Int -> Int -> ByteString
forall str. StringLike str => str -> Int -> Int -> ByteString
substring' str
str Int
index Int
spaceOrCloseTag
        if str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str Int
index0 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
slashChar
          then ByteString -> m ()
closeF ByteString
tagname
          else do
            ByteString -> m ()
openF ByteString
tagname
            ByteString -> m ()
endOpenF ByteString
tagname
        Int -> m ()
findLT (Int
spaceOrCloseTag Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
      | Bool
otherwise = do
        let tagname :: ByteString
tagname = str -> Int -> Int -> ByteString
forall str. StringLike str => str -> Int -> Int -> ByteString
substring' str
str Int
index Int
spaceOrCloseTag
        ByteString -> m ()
openF ByteString
tagname
        Either Int Int
result <- Int -> m (Either Int Int)
findAttributes Int
spaceOrCloseTag
        ByteString -> m ()
endOpenF ByteString
tagname
        case Either Int Int
result of
          Right Int
closingTag -> Int -> m ()
findLT (Int
closingTag Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
          Left Int
closingPair -> do
            ByteString -> m ()
closeF ByteString
tagname
            Int -> m ()
findLT (Int
closingPair Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2)
      where
        index :: Int
index =
          if str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str Int
index0 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
slashChar
            then Int
index0 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
            else Int
index0
        spaceOrCloseTag :: Int
spaceOrCloseTag = str -> Int -> Int
forall str. StringLike str => str -> Int -> Int
parseName str
str Int
index
    findAttributes :: Int -> m (Either Int Int)
findAttributes Int
index0
      -- case: />
      | str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str Int
index Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
slashChar
      , str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str (Int
index Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
closeTagChar
      = Either Int Int -> m (Either Int Int)
forall a. a -> m a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int -> Either Int Int
forall a b. a -> Either a b
Left Int
index)

      -- case: >
      | str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str Int
index Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
closeTagChar
      = Either Int Int -> m (Either Int Int)
forall a. a -> m a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int -> Either Int Int
forall a b. b -> Either a b
Right Int
index)

      -- case: attr=' or attr="
      | str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str Int
afterAttrName Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
equalChar
      , Word8
usedChar Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
quoteChar Bool -> Bool -> Bool
|| Word8
usedChar Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
doubleQuoteChar
      = case Word8 -> str -> Int -> Maybe Int
forall str. StringLike str => Word8 -> str -> Int -> Maybe Int
elemIndexFrom' Word8
usedChar str
str (Int
quoteIndex Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) of
          Maybe Int
Nothing ->
            XenoException -> m (Either Int Int)
forall a e. Exception e => e -> a
throw
              (Int -> ByteString -> XenoException
XenoParseError Int
index ByteString
"Couldn't find the matching quote character.")
          Just Int
endQuoteIndex -> do
            ByteString -> ByteString -> m ()
attrF
              (str -> Int -> Int -> ByteString
forall str. StringLike str => str -> Int -> Int -> ByteString
substring' str
str Int
index Int
afterAttrName)
              (str -> Int -> Int -> ByteString
forall str. StringLike str => str -> Int -> Int -> ByteString
substring'
                 str
str
                 (Int
quoteIndex Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
                 (Int
endQuoteIndex))
            Int -> m (Either Int Int)
findAttributes (Int
endQuoteIndex Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)

      -- case: attr= without following quote
      | str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str Int
afterAttrName Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
equalChar
      = XenoException -> m (Either Int Int)
forall a e. Exception e => e -> a
throw (Int -> ByteString -> XenoException
XenoParseError Int
index(ByteString
"Expected ' or \", got: " ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> Word8 -> ByteString
S.singleton Word8
usedChar))

      | Bool
otherwise
      = XenoException -> m (Either Int Int)
forall a e. Exception e => e -> a
throw (Int -> ByteString -> XenoException
XenoParseError Int
index (ByteString
"Expected =, got: " ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> Word8 -> ByteString
S.singleton (str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str Int
afterAttrName) ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
" at character index: " ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ([Char] -> ByteString
S8.pack ([Char] -> ByteString) -> (Int -> [Char]) -> Int -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> [Char]
forall a. Show a => a -> [Char]
show) Int
afterAttrName))
      where
        index :: Int
index = str -> Int -> Int
forall str. StringLike str => str -> Int -> Int
skipSpaces str
str Int
index0
#ifdef WHITESPACE_AROUND_EQUALS
        afterAttrName = skipSpaces str (parseName str index)
        quoteIndex = skipSpaces str (afterAttrName + 1)
#else
        afterAttrName :: Int
afterAttrName = str -> Int -> Int
forall str. StringLike str => str -> Int -> Int
parseName str
str Int
index
        quoteIndex :: Int
quoteIndex = Int
afterAttrName Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
#endif
        usedChar :: Word8
usedChar = str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str Int
quoteIndex

{-# INLINE process #-}
{-# SPECIALISE process :: Process (Identity ()) -> ByteString -> Identity ()
               #-}
{-# SPECIALISE process :: Process (State s ()) -> ByteString -> State s ()
               #-}
{-# SPECIALISE process :: Process (ST s ()) -> ByteString -> ST s ()
               #-}
{-# SPECIALISE process :: Process (IO ()) -> ByteString -> IO ()
               #-}
{-# SPECIALISE process :: Process (Identity ()) -> ByteStringZeroTerminated -> Identity ()
               #-}
{-# SPECIALISE process :: Process (State s ()) -> ByteStringZeroTerminated -> State s ()
               #-}
{-# SPECIALISE process :: Process (ST s ()) -> ByteStringZeroTerminated -> ST s ()
               #-}
{-# SPECIALISE process :: Process (IO ()) -> ByteStringZeroTerminated -> IO ()
               #-}

--------------------------------------------------------------------------------
-- ByteString utilities

-- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
s_index :: ByteString -> Int -> Word8
s_index :: ByteString -> Int -> Word8
s_index ByteString
ps Int
n
    | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0            = XenoException -> Word8
forall a e. Exception e => e -> a
throw (Int -> ByteString -> XenoException
XenoStringIndexProblem Int
n ByteString
ps)
    | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= ByteString -> Int
S.length ByteString
ps = XenoException -> Word8
forall a e. Exception e => e -> a
throw (Int -> ByteString -> XenoException
XenoStringIndexProblem Int
n ByteString
ps)
    | Bool
otherwise        = ByteString
ps ByteString -> Int -> Word8
`SU.unsafeIndex` Int
n
{-# INLINE s_index #-}

-- | A fast space skipping function.
skipSpaces :: (StringLike str) => str -> Int -> Int
skipSpaces :: forall str. StringLike str => str -> Int -> Int
skipSpaces str
str Int
i =
  if Word8 -> Bool
isSpaceChar (str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str Int
i)
    then str -> Int -> Int
forall str. StringLike str => str -> Int -> Int
skipSpaces str
str (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
    else Int
i
{-# INLINE skipSpaces #-}

-- | Get a substring of a string.
substring :: ByteString -> Int -> Int -> ByteString
substring :: ByteString -> Int -> Int -> ByteString
substring ByteString
s Int
start Int
end = Int -> ByteString -> ByteString
S.take (Int
end Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
start) (Int -> ByteString -> ByteString
S.drop Int
start ByteString
s)
{-# INLINE substring #-}

-- | Basically @findIndex (not . isNameChar)@, but doesn't allocate.
parseName :: (StringLike str) => str -> Int -> Int
parseName :: forall str. StringLike str => str -> Int -> Int
parseName str
str Int
index =
  if Bool -> Bool
not (Word8 -> Bool
isNameChar1 (str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str Int
index))
     then Int
index
     else str -> Int -> Int
forall str. StringLike str => str -> Int -> Int
parseName' str
str (Int
index Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
{-# INLINE parseName #-}

-- | Basically @findIndex (not . isNameChar)@, but doesn't allocate.
parseName' :: (StringLike str) => str -> Int -> Int
parseName' :: forall str. StringLike str => str -> Int -> Int
parseName' str
str Int
index =
  if Bool -> Bool
not (Word8 -> Bool
isNameChar (str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str Int
index))
     then Int
index
     else str -> Int -> Int
forall str. StringLike str => str -> Int -> Int
parseName' str
str (Int
index Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
{-# INLINE parseName' #-}

-- | Get index of an element starting from offset.
elemIndexFrom :: Word8 -> ByteString -> Int -> Maybe Int
elemIndexFrom :: Word8 -> ByteString -> Int -> Maybe Int
elemIndexFrom Word8
c ByteString
str Int
offset = (Int -> Int) -> Maybe Int -> Maybe Int
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
offset) (Word8 -> ByteString -> Maybe Int
S.elemIndex Word8
c (Int -> ByteString -> ByteString
S.drop Int
offset ByteString
str))
-- Without the INLINE below, the whole function is twice as slow and
-- has linear allocation. See git commit with this comment for
-- results.
{-# INLINE elemIndexFrom #-}

--------------------------------------------------------------------------------
-- Character types

isSpaceChar :: Word8 -> Bool
isSpaceChar :: Word8 -> Bool
isSpaceChar = Int -> Int -> Bool
forall a. Bits a => a -> Int -> Bool
testBit (Int
0b100000000000000000010011000000000 :: Int) (Int -> Bool) -> (Word8 -> Int) -> Word8 -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral
--                       |                  |  ||  bits:
--                       |                  |  |+-- 9
--                       |                  |  +--- 10
--                       |                  +------ 13
--                       +------------------------- 32
{-# INLINE isSpaceChar #-}

-- | Is the character a valid first tag/attribute name constituent?
-- 'a'-'z', 'A'-'Z', '_', ':'
isNameChar1 :: Word8 -> Bool
isNameChar1 :: Word8 -> Bool
isNameChar1 Word8
c =
  (Word8
c Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word8
97 Bool -> Bool -> Bool
&& Word8
c Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
122) Bool -> Bool -> Bool
|| (Word8
c Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word8
65 Bool -> Bool -> Bool
&& Word8
c Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
90) Bool -> Bool -> Bool
|| Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
95 Bool -> Bool -> Bool
|| Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
58
{-# INLINE isNameChar1 #-}

-- isNameCharOriginal :: Word8 -> Bool
-- isNameCharOriginal c =
--   (c >= 97 && c <= 122) || (c >= 65 && c <= 90) || c == 95 || c == 58 ||
--   c == 45 || c == 46 || (c >= 48 && c <= 57)
-- {-# INLINE isNameCharOriginal #-}
--
-- TODO Strange, but highMaskIsNameChar, lowMaskIsNameChar don't calculate fast... FIX IT
-- highMaskIsNameChar, lowMaskIsNameChar :: Word64
-- (highMaskIsNameChar, lowMaskIsNameChar) =
--     foldl (\(hi,low) char -> (hi `setBit` (char - 64), low `setBit` char)) -- NB: `setBit` can process overflowed values (where char < 64; -- TODO fix it
--           (0::Word64, 0::Word64)
--           (map fromIntegral (filter isNameCharOriginal [0..128]))
-- {-# INLINE highMaskIsNameChar #-}
-- {-# INLINE lowMaskIsNameChar #-}

-- | Is the character a valid tag/attribute name constituent?
-- isNameChar1 + '-', '.', '0'-'9'
isNameChar :: Word8 -> Bool
isNameChar :: Word8 -> Bool
isNameChar Word8
char = (Word64
lowMaskIsNameChar Word64 -> Int -> Bool
forall a. Bits a => a -> Int -> Bool
`testBit` Int
char'low) Bool -> Bool -> Bool
|| (Word64
highMaskIsNameChar Word64 -> Int -> Bool
forall a. Bits a => a -> Int -> Bool
`testBit` Int
char'high)
   -- TODO 1) change code to use W# instead of Word64
   --      2) Document `ii - 64` -- there is underflow, but `testBit` can process this!
  where
    char'low :: Int
char'low  = Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
char
    char'high :: Int
char'high = Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8
char Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
- Word8
64)
    highMaskIsNameChar :: Word64
    highMaskIsNameChar :: Word64
highMaskIsNameChar = Word64
0b11111111111111111111111111010000111111111111111111111111110
    --                     ------------+------------- |    ------------+-------------
    --                                 |              |                |  bits:
    --                                 |              |                +-- 65-90
    --                                 |              +------------------- 95
    --                                 +---------------------------------- 97-122
    lowMaskIsNameChar :: Word64
    lowMaskIsNameChar :: Word64
lowMaskIsNameChar =  Word64
0b11111111111011000000000000000000000000000000000000000000000
    --                     -----+----- ||
    --                          |      ||  bits:
    --                          |      |+-- 45
    --                          |      +--- 46
    --                          +---------- 48-58
{-# INLINE isNameChar #-}

-- | Char for '\''.
quoteChar :: Word8
quoteChar :: Word8
quoteChar = Word8
39

-- | Char for '"'.
doubleQuoteChar :: Word8
doubleQuoteChar :: Word8
doubleQuoteChar = Word8
34

-- | Char for '='.
equalChar :: Word8
equalChar :: Word8
equalChar = Word8
61

-- | Char for '?'.
questionChar :: Word8
questionChar :: Word8
questionChar = Word8
63

-- | Char for '/'.
slashChar :: Word8
slashChar :: Word8
slashChar = Word8
47

-- | Exclaimation character !.
bangChar :: Word8
bangChar :: Word8
bangChar = Word8
33

-- | The dash character.
commentChar :: Word8
commentChar :: Word8
commentChar = Word8
45 -- '-'

-- | Open tag character.
openTagChar :: Word8
openTagChar :: Word8
openTagChar = Word8
60 -- '<'

-- | Close tag character.
closeTagChar :: Word8
closeTagChar :: Word8
closeTagChar = Word8
62 -- '>'

-- | Open angle bracket character.
openAngleBracketChar :: Word8
openAngleBracketChar :: Word8
openAngleBracketChar = Word8
91

-- | Close angle bracket character.
closeAngleBracketChar :: Word8
closeAngleBracketChar :: Word8
closeAngleBracketChar = Word8
93

-- | Skip initial DOCTYPE declaration
skipDoctype :: ByteString -> ByteString
skipDoctype :: ByteString -> ByteString
skipDoctype ByteString
arg =
    if ByteString
"<!DOCTYPE" ByteString -> ByteString -> Bool
`S8.isPrefixOf` ByteString
bs
      then let (ByteString
_, ByteString
rest)=ByteString
">" ByteString -> ByteString -> (ByteString, ByteString)
`S8.breakSubstring` ByteString
bs
           in ByteString -> ByteString
skipBlanks (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
S8.drop Int
1 ByteString
rest
      else ByteString
bs
  where
    bs :: ByteString
bs = ByteString -> ByteString
skipBlanks ByteString
arg
    skipBlanks :: ByteString -> ByteString
skipBlanks = (Char -> Bool) -> ByteString -> ByteString
S8.dropWhile Char -> Bool
isSpace