{-# LANGUAGE CPP #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE BinaryLiterals #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Xeno.SAX
( process
, Process(..)
, StringLike(..)
, fold
, validate
, validateEx
, dump
, skipDoctype
) where
import Control.Exception (throw)
import Control.Monad (unless)
import Control.Monad.ST (ST, runST)
import Control.Monad.State.Strict (State, evalStateT, execState, modify', lift, get, put)
import Control.Spork (spork)
import Data.Bits (testBit)
import Data.ByteString (ByteString)
import qualified Data.ByteString as S
import qualified Data.ByteString.Char8 as S8
import qualified Data.ByteString.Unsafe as SU
import Data.Char (isSpace)
import Data.Functor.Identity (Identity(..))
import Data.Semigroup ()
import Data.STRef (newSTRef, modifySTRef', readSTRef)
import Data.Word (Word8, Word64)
import Xeno.Types
class StringLike str where
s_index' :: str -> Int -> Word8
elemIndexFrom' :: Word8 -> str -> Int -> Maybe Int
drop' :: Int -> str -> str
substring' :: str -> Int -> Int -> ByteString
toBS :: str -> ByteString
instance StringLike ByteString where
s_index' :: ByteString -> Int -> Word8
s_index' = ByteString -> Int -> Word8
s_index
{-# INLINE s_index' #-}
elemIndexFrom' :: Word8 -> ByteString -> Int -> Maybe Int
elemIndexFrom' = Word8 -> ByteString -> Int -> Maybe Int
elemIndexFrom
{-# INLINE elemIndexFrom' #-}
drop' :: Int -> ByteString -> ByteString
drop' = Int -> ByteString -> ByteString
S.drop
{-# INLINE drop' #-}
substring' :: ByteString -> Int -> Int -> ByteString
substring' = ByteString -> Int -> Int -> ByteString
substring
{-# INLINE substring' #-}
toBS :: ByteString -> ByteString
toBS = ByteString -> ByteString
forall a. a -> a
id
{-# INLINE toBS #-}
instance StringLike ByteStringZeroTerminated where
s_index' :: ByteStringZeroTerminated -> Int -> Word8
s_index' (BSZT ByteString
ps) Int
n = ByteString
ps ByteString -> Int -> Word8
`SU.unsafeIndex` Int
n
{-# INLINE s_index' #-}
elemIndexFrom' :: Word8 -> ByteStringZeroTerminated -> Int -> Maybe Int
elemIndexFrom' Word8
w (BSZT ByteString
bs) Int
i = Word8 -> ByteString -> Int -> Maybe Int
elemIndexFrom Word8
w ByteString
bs Int
i
{-# INLINE elemIndexFrom' #-}
drop' :: Int -> ByteStringZeroTerminated -> ByteStringZeroTerminated
drop' Int
i (BSZT ByteString
bs) = ByteString -> ByteStringZeroTerminated
BSZT (ByteString -> ByteStringZeroTerminated)
-> ByteString -> ByteStringZeroTerminated
forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
S.drop Int
i ByteString
bs
{-# INLINE drop' #-}
substring' :: ByteStringZeroTerminated -> Int -> Int -> ByteString
substring' (BSZT ByteString
bs) Int
s Int
t = ByteString -> Int -> Int -> ByteString
substring ByteString
bs Int
s Int
t
{-# INLINE substring' #-}
toBS :: ByteStringZeroTerminated -> ByteString
toBS (BSZT ByteString
bs) = ByteString
bs
{-# INLINE toBS #-}
data Process a =
Process {
forall a. Process a -> ByteString -> a
openF :: !(ByteString -> a)
, forall a. Process a -> ByteString -> ByteString -> a
attrF :: !(ByteString -> ByteString -> a)
, forall a. Process a -> ByteString -> a
endOpenF :: !(ByteString -> a)
, forall a. Process a -> ByteString -> a
textF :: !(ByteString -> a)
, forall a. Process a -> ByteString -> a
closeF :: !(ByteString -> a)
, forall a. Process a -> ByteString -> a
cdataF :: !(ByteString -> a)
}
validate :: (StringLike str) => str -> Bool
validate :: forall str. StringLike str => str -> Bool
validate str
s =
case () -> Either XenoException ()
forall e a. Exception e => a -> Either e a
spork
(Identity () -> ()
forall a. Identity a -> a
runIdentity
(Process (Identity ()) -> str -> Identity ()
forall (m :: * -> *) str.
(Monad m, StringLike str) =>
Process (m ()) -> str -> m ()
process
Process {
openF :: ByteString -> Identity ()
openF = \ByteString
_ -> () -> Identity ()
forall a. a -> Identity a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
, attrF :: ByteString -> ByteString -> Identity ()
attrF = \ByteString
_ ByteString
_ -> () -> Identity ()
forall a. a -> Identity a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
, endOpenF :: ByteString -> Identity ()
endOpenF = \ByteString
_ -> () -> Identity ()
forall a. a -> Identity a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
, textF :: ByteString -> Identity ()
textF = \ByteString
_ -> () -> Identity ()
forall a. a -> Identity a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
, closeF :: ByteString -> Identity ()
closeF = \ByteString
_ -> () -> Identity ()
forall a. a -> Identity a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
, cdataF :: ByteString -> Identity ()
cdataF = \ByteString
_ -> () -> Identity ()
forall a. a -> Identity a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
}
str
s)) of
Left (XenoException
_ :: XenoException) -> Bool
False
Right ()
_ -> Bool
True
{-# INLINE validate #-}
{-# SPECIALISE validate :: ByteString -> Bool #-}
{-# SPECIALISE validate :: ByteStringZeroTerminated -> Bool #-}
validateEx :: (StringLike str) => str -> Bool
validateEx :: forall str. StringLike str => str -> Bool
validateEx str
s =
case () -> Either XenoException ()
forall e a. Exception e => a -> Either e a
spork
((forall s. ST s ()) -> ()
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s ()) -> ()) -> (forall s. ST s ()) -> ()
forall a b. (a -> b) -> a -> b
$ do
STRef s [ByteString]
tags <- [ByteString] -> ST s (STRef s [ByteString])
forall a s. a -> ST s (STRef s a)
newSTRef []
(Process (ST s ()) -> str -> ST s ()
forall (m :: * -> *) str.
(Monad m, StringLike str) =>
Process (m ()) -> str -> m ()
process
Process {
openF :: ByteString -> ST s ()
openF = \ByteString
tag -> STRef s [ByteString] -> ([ByteString] -> [ByteString]) -> ST s ()
forall s a. STRef s a -> (a -> a) -> ST s ()
modifySTRef' STRef s [ByteString]
tags (ByteString
tagByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
:)
, attrF :: ByteString -> ByteString -> ST s ()
attrF = \ByteString
_ ByteString
_ -> () -> ST s ()
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
, endOpenF :: ByteString -> ST s ()
endOpenF = \ByteString
_ -> () -> ST s ()
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
, textF :: ByteString -> ST s ()
textF = \ByteString
_ -> () -> ST s ()
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
, closeF :: ByteString -> ST s ()
closeF = \ByteString
tag ->
STRef s [ByteString] -> ([ByteString] -> [ByteString]) -> ST s ()
forall s a. STRef s a -> (a -> a) -> ST s ()
modifySTRef' STRef s [ByteString]
tags (([ByteString] -> [ByteString]) -> ST s ())
-> ([ByteString] -> [ByteString]) -> ST s ()
forall a b. (a -> b) -> a -> b
$ \case
[] -> [Char] -> [ByteString]
forall a. [Char] -> [a]
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail ([Char] -> [ByteString]) -> [Char] -> [ByteString]
forall a b. (a -> b) -> a -> b
$ [Char]
"Unexpected close tag \"" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ ByteString -> [Char]
forall a. Show a => a -> [Char]
show ByteString
tag [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
"\""
(ByteString
expectedTag:[ByteString]
tags') ->
if ByteString
expectedTag ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString
tag
then [ByteString]
tags'
else [Char] -> [ByteString]
forall a. [Char] -> [a]
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail ([Char] -> [ByteString]) -> [Char] -> [ByteString]
forall a b. (a -> b) -> a -> b
$ [Char]
"Unexpected close tag. Expected \"" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ ByteString -> [Char]
forall a. Show a => a -> [Char]
show ByteString
expectedTag [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
"\", but got \"" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ ByteString -> [Char]
forall a. Show a => a -> [Char]
show ByteString
tag [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
"\""
, cdataF :: ByteString -> ST s ()
cdataF = \ByteString
_ -> () -> ST s ()
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
}
str
s)
STRef s [ByteString] -> ST s [ByteString]
forall s a. STRef s a -> ST s a
readSTRef STRef s [ByteString]
tags ST s [ByteString] -> ([ByteString] -> ST s ()) -> ST s ()
forall a b. ST s a -> (a -> ST s b) -> ST s b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
[] -> () -> ST s ()
forall a. a -> ST s a
forall (m :: * -> *) a. Monad m => a -> m a
return ()
[ByteString]
tags' -> [Char] -> ST s ()
forall a. HasCallStack => [Char] -> a
error ([Char] -> ST s ()) -> [Char] -> ST s ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Not all tags closed: " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [ByteString] -> [Char]
forall a. Show a => a -> [Char]
show [ByteString]
tags'
) of
Left (XenoException
_ :: XenoException) -> Bool
False
Right ()
_ -> Bool
True
{-# INLINE validateEx #-}
{-# SPECIALISE validateEx :: ByteString -> Bool #-}
{-# SPECIALISE validateEx :: ByteStringZeroTerminated -> Bool #-}
dump :: ByteString -> IO ()
dump :: ByteString -> IO ()
dump ByteString
str =
StateT Int IO () -> Int -> IO ()
forall (m :: * -> *) s a. Monad m => StateT s m a -> s -> m a
evalStateT
(Process (StateT Int IO ()) -> ByteString -> StateT Int IO ()
forall (m :: * -> *) str.
(Monad m, StringLike str) =>
Process (m ()) -> str -> m ()
process
Process {
openF :: ByteString -> StateT Int IO ()
openF = \ByteString
name -> do
Int
level <- StateT Int IO Int
forall s (m :: * -> *). MonadState s m => m s
get
IO () -> StateT Int IO ()
forall (m :: * -> *) a. Monad m => m a -> StateT Int m a
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (ByteString -> IO ()
S8.putStr (Int -> Char -> ByteString
S8.replicate Int
level Char
' ' ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
"<" ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
name ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
""))
, attrF :: ByteString -> ByteString -> StateT Int IO ()
attrF = \ByteString
key ByteString
value -> IO () -> StateT Int IO ()
forall (m :: * -> *) a. Monad m => m a -> StateT Int m a
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (ByteString -> IO ()
S8.putStr (ByteString
" " ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
key ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
"=\"" ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
value ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
"\""))
, endOpenF :: ByteString -> StateT Int IO ()
endOpenF = \ByteString
_ -> do
Int
level <- StateT Int IO Int
forall s (m :: * -> *). MonadState s m => m s
get
let !level' :: Int
level' = Int
level Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2
Int -> StateT Int IO ()
forall s (m :: * -> *). MonadState s m => s -> m ()
put Int
level'
IO () -> StateT Int IO ()
forall (m :: * -> *) a. Monad m => m a -> StateT Int m a
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (ByteString -> IO ()
S8.putStrLn (ByteString
">"))
, textF :: ByteString -> StateT Int IO ()
textF = \ByteString
text -> do
Int
level <- StateT Int IO Int
forall s (m :: * -> *). MonadState s m => m s
get
IO () -> StateT Int IO ()
forall (m :: * -> *) a. Monad m => m a -> StateT Int m a
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (ByteString -> IO ()
S8.putStrLn (Int -> Char -> ByteString
S8.replicate Int
level Char
' ' ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> [Char] -> ByteString
S8.pack (ByteString -> [Char]
forall a. Show a => a -> [Char]
show ByteString
text)))
, closeF :: ByteString -> StateT Int IO ()
closeF = \ByteString
name -> do
Int
level <- StateT Int IO Int
forall s (m :: * -> *). MonadState s m => m s
get
let !level' :: Int
level' = Int
level Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
2
Int -> StateT Int IO ()
forall s (m :: * -> *). MonadState s m => s -> m ()
put Int
level'
IO () -> StateT Int IO ()
forall (m :: * -> *) a. Monad m => m a -> StateT Int m a
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (ByteString -> IO ()
S8.putStrLn (Int -> Char -> ByteString
S8.replicate Int
level' Char
' ' ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
"</" ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
name ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
">"))
, cdataF :: ByteString -> StateT Int IO ()
cdataF = \ByteString
cdata -> do
Int
level <- StateT Int IO Int
forall s (m :: * -> *). MonadState s m => m s
get
IO () -> StateT Int IO ()
forall (m :: * -> *) a. Monad m => m a -> StateT Int m a
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (ByteString -> IO ()
S8.putStrLn (Int -> Char -> ByteString
S8.replicate Int
level Char
' ' ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
"CDATA: " ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> [Char] -> ByteString
S8.pack (ByteString -> [Char]
forall a. Show a => a -> [Char]
show ByteString
cdata)))
}
ByteString
str)
(Int
0 :: Int)
fold
:: (s -> ByteString -> s)
-> (s -> ByteString -> ByteString -> s)
-> (s -> ByteString -> s)
-> (s -> ByteString -> s)
-> (s -> ByteString -> s)
-> (s -> ByteString -> s)
-> s
-> ByteString
-> Either XenoException s
fold :: forall s.
(s -> ByteString -> s)
-> (s -> ByteString -> ByteString -> s)
-> (s -> ByteString -> s)
-> (s -> ByteString -> s)
-> (s -> ByteString -> s)
-> (s -> ByteString -> s)
-> s
-> ByteString
-> Either XenoException s
fold s -> ByteString -> s
openF s -> ByteString -> ByteString -> s
attrF s -> ByteString -> s
endOpenF s -> ByteString -> s
textF s -> ByteString -> s
closeF s -> ByteString -> s
cdataF s
s ByteString
str =
s -> Either XenoException s
forall e a. Exception e => a -> Either e a
spork
(State s () -> s -> s
forall s a. State s a -> s -> s
execState
(Process (State s ()) -> ByteString -> State s ()
forall (m :: * -> *) str.
(Monad m, StringLike str) =>
Process (m ()) -> str -> m ()
process Process {
openF :: ByteString -> State s ()
openF = \ByteString
name -> (s -> s) -> State s ()
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify' (\s
s' -> s -> ByteString -> s
openF s
s' ByteString
name)
, attrF :: ByteString -> ByteString -> State s ()
attrF = \ByteString
key ByteString
value -> (s -> s) -> State s ()
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify' (\s
s' -> s -> ByteString -> ByteString -> s
attrF s
s' ByteString
key ByteString
value)
, endOpenF :: ByteString -> State s ()
endOpenF = \ByteString
name -> (s -> s) -> State s ()
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify' (\s
s' -> s -> ByteString -> s
endOpenF s
s' ByteString
name)
, textF :: ByteString -> State s ()
textF = \ByteString
text -> (s -> s) -> State s ()
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify' (\s
s' -> s -> ByteString -> s
textF s
s' ByteString
text)
, closeF :: ByteString -> State s ()
closeF = \ByteString
name -> (s -> s) -> State s ()
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify' (\s
s' -> s -> ByteString -> s
closeF s
s' ByteString
name)
, cdataF :: ByteString -> State s ()
cdataF = \ByteString
cdata -> (s -> s) -> State s ()
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify' (\s
s' -> s -> ByteString -> s
cdataF s
s' ByteString
cdata)
} ByteString
str)
s
s)
process
:: (Monad m, StringLike str)
=> Process (m ())
-> str
-> m ()
process :: forall (m :: * -> *) str.
(Monad m, StringLike str) =>
Process (m ()) -> str -> m ()
process !(Process {ByteString -> m ()
openF :: forall a. Process a -> ByteString -> a
openF :: ByteString -> m ()
openF, ByteString -> ByteString -> m ()
attrF :: forall a. Process a -> ByteString -> ByteString -> a
attrF :: ByteString -> ByteString -> m ()
attrF, ByteString -> m ()
endOpenF :: forall a. Process a -> ByteString -> a
endOpenF :: ByteString -> m ()
endOpenF, ByteString -> m ()
textF :: forall a. Process a -> ByteString -> a
textF :: ByteString -> m ()
textF, ByteString -> m ()
closeF :: forall a. Process a -> ByteString -> a
closeF :: ByteString -> m ()
closeF, ByteString -> m ()
cdataF :: forall a. Process a -> ByteString -> a
cdataF :: ByteString -> m ()
cdataF}) str
str = Int -> m ()
findLT Int
0
where
findLT :: Int -> m ()
findLT Int
index =
case Word8 -> str -> Int -> Maybe Int
forall str. StringLike str => Word8 -> str -> Int -> Maybe Int
elemIndexFrom' Word8
openTagChar str
str Int
index of
Maybe Int
Nothing -> Bool -> m () -> m ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (ByteString -> Bool
S.null ByteString
text) (ByteString -> m ()
textF ByteString
text)
where text :: ByteString
text = str -> ByteString
forall str. StringLike str => str -> ByteString
toBS (str -> ByteString) -> str -> ByteString
forall a b. (a -> b) -> a -> b
$ Int -> str -> str
forall str. StringLike str => Int -> str -> str
drop' Int
index str
str
Just Int
fromLt -> do
Bool -> m () -> m ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (ByteString -> Bool
S.null ByteString
text) (ByteString -> m ()
textF ByteString
text)
Int -> m ()
checkOpenComment (Int
fromLt Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
where text :: ByteString
text = str -> Int -> Int -> ByteString
forall str. StringLike str => str -> Int -> Int -> ByteString
substring' str
str Int
index Int
fromLt
checkOpenComment :: Int -> m ()
checkOpenComment Int
index
| str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
this Int
0 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
bangChar
, str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
this Int
1 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
commentChar
, str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
this Int
2 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
commentChar
= Int -> m ()
findCommentEnd (Int
index Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
3)
| str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
this Int
0 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
bangChar
, str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
this Int
1 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
openAngleBracketChar
, str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
this Int
2 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
67
, str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
this Int
3 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
68
, str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
this Int
4 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
65
, str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
this Int
5 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
84
, str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
this Int
6 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
65
, str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
this Int
7 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
openAngleBracketChar
= Int -> Int -> m ()
findCDataEnd (Int
index Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
8) (Int
index Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
8)
| Bool
otherwise
= Int -> m ()
findTagName Int
index
where
this :: str
this = Int -> str -> str
forall str. StringLike str => Int -> str -> str
drop' Int
index str
str
findCommentEnd :: Int -> m ()
findCommentEnd Int
index =
case Word8 -> str -> Int -> Maybe Int
forall str. StringLike str => Word8 -> str -> Int -> Maybe Int
elemIndexFrom' Word8
commentChar str
str Int
index of
Maybe Int
Nothing -> XenoException -> m ()
forall a e. Exception e => e -> a
throw (XenoException -> m ()) -> XenoException -> m ()
forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> XenoException
XenoParseError Int
index ByteString
"Couldn't find the closing comment dash."
Just Int
fromDash ->
if str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
this Int
0 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
commentChar Bool -> Bool -> Bool
&& str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
this Int
1 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
closeTagChar
then Int -> m ()
findLT (Int
fromDash Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2)
else Int -> m ()
findCommentEnd (Int
fromDash Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
where this :: str
this = Int -> str -> str
forall str. StringLike str => Int -> str -> str
drop' Int
index str
str
findCDataEnd :: Int -> Int -> m ()
findCDataEnd Int
cdata_start Int
index =
case Word8 -> str -> Int -> Maybe Int
forall str. StringLike str => Word8 -> str -> Int -> Maybe Int
elemIndexFrom' Word8
closeAngleBracketChar str
str Int
index of
Maybe Int
Nothing -> XenoException -> m ()
forall a e. Exception e => e -> a
throw (XenoException -> m ()) -> XenoException -> m ()
forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> XenoException
XenoParseError Int
index ByteString
"Couldn't find closing angle bracket for CDATA."
Just Int
fromCloseAngleBracket ->
if str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str (Int
fromCloseAngleBracket Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
closeAngleBracketChar
then do
ByteString -> m ()
cdataF (str -> Int -> Int -> ByteString
forall str. StringLike str => str -> Int -> Int -> ByteString
substring' str
str Int
cdata_start Int
fromCloseAngleBracket)
Int -> m ()
findLT (Int
fromCloseAngleBracket Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
3)
else
Int -> Int -> m ()
findCDataEnd Int
cdata_start (Int
fromCloseAngleBracket Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
findTagName :: Int -> m ()
findTagName Int
index0
| str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str Int
index0 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
questionChar =
case Word8 -> str -> Int -> Maybe Int
forall str. StringLike str => Word8 -> str -> Int -> Maybe Int
elemIndexFrom' Word8
closeTagChar str
str Int
spaceOrCloseTag of
Maybe Int
Nothing -> XenoException -> m ()
forall a e. Exception e => e -> a
throw (XenoException -> m ()) -> XenoException -> m ()
forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> XenoException
XenoParseError Int
index ByteString
"Couldn't find the end of the tag."
Just Int
fromGt -> do
Int -> m ()
findLT (Int
fromGt Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
| str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str Int
spaceOrCloseTag Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
closeTagChar = do
let tagname :: ByteString
tagname = str -> Int -> Int -> ByteString
forall str. StringLike str => str -> Int -> Int -> ByteString
substring' str
str Int
index Int
spaceOrCloseTag
if str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str Int
index0 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
slashChar
then ByteString -> m ()
closeF ByteString
tagname
else do
ByteString -> m ()
openF ByteString
tagname
ByteString -> m ()
endOpenF ByteString
tagname
Int -> m ()
findLT (Int
spaceOrCloseTag Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
| Bool
otherwise = do
let tagname :: ByteString
tagname = str -> Int -> Int -> ByteString
forall str. StringLike str => str -> Int -> Int -> ByteString
substring' str
str Int
index Int
spaceOrCloseTag
ByteString -> m ()
openF ByteString
tagname
Either Int Int
result <- Int -> m (Either Int Int)
findAttributes Int
spaceOrCloseTag
ByteString -> m ()
endOpenF ByteString
tagname
case Either Int Int
result of
Right Int
closingTag -> Int -> m ()
findLT (Int
closingTag Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
Left Int
closingPair -> do
ByteString -> m ()
closeF ByteString
tagname
Int -> m ()
findLT (Int
closingPair Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2)
where
index :: Int
index =
if str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str Int
index0 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
slashChar
then Int
index0 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
else Int
index0
spaceOrCloseTag :: Int
spaceOrCloseTag = str -> Int -> Int
forall str. StringLike str => str -> Int -> Int
parseName str
str Int
index
findAttributes :: Int -> m (Either Int Int)
findAttributes Int
index0
| str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str Int
index Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
slashChar
, str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str (Int
index Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
closeTagChar
= Either Int Int -> m (Either Int Int)
forall a. a -> m a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int -> Either Int Int
forall a b. a -> Either a b
Left Int
index)
| str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str Int
index Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
closeTagChar
= Either Int Int -> m (Either Int Int)
forall a. a -> m a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int -> Either Int Int
forall a b. b -> Either a b
Right Int
index)
| str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str Int
afterAttrName Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
equalChar
, Word8
usedChar Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
quoteChar Bool -> Bool -> Bool
|| Word8
usedChar Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
doubleQuoteChar
= case Word8 -> str -> Int -> Maybe Int
forall str. StringLike str => Word8 -> str -> Int -> Maybe Int
elemIndexFrom' Word8
usedChar str
str (Int
quoteIndex Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) of
Maybe Int
Nothing ->
XenoException -> m (Either Int Int)
forall a e. Exception e => e -> a
throw
(Int -> ByteString -> XenoException
XenoParseError Int
index ByteString
"Couldn't find the matching quote character.")
Just Int
endQuoteIndex -> do
ByteString -> ByteString -> m ()
attrF
(str -> Int -> Int -> ByteString
forall str. StringLike str => str -> Int -> Int -> ByteString
substring' str
str Int
index Int
afterAttrName)
(str -> Int -> Int -> ByteString
forall str. StringLike str => str -> Int -> Int -> ByteString
substring'
str
str
(Int
quoteIndex Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
(Int
endQuoteIndex))
Int -> m (Either Int Int)
findAttributes (Int
endQuoteIndex Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
| str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str Int
afterAttrName Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
equalChar
= XenoException -> m (Either Int Int)
forall a e. Exception e => e -> a
throw (Int -> ByteString -> XenoException
XenoParseError Int
index(ByteString
"Expected ' or \", got: " ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> Word8 -> ByteString
S.singleton Word8
usedChar))
| Bool
otherwise
= XenoException -> m (Either Int Int)
forall a e. Exception e => e -> a
throw (Int -> ByteString -> XenoException
XenoParseError Int
index (ByteString
"Expected =, got: " ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> Word8 -> ByteString
S.singleton (str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str Int
afterAttrName) ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
" at character index: " ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ([Char] -> ByteString
S8.pack ([Char] -> ByteString) -> (Int -> [Char]) -> Int -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> [Char]
forall a. Show a => a -> [Char]
show) Int
afterAttrName))
where
index :: Int
index = str -> Int -> Int
forall str. StringLike str => str -> Int -> Int
skipSpaces str
str Int
index0
#ifdef WHITESPACE_AROUND_EQUALS
afterAttrName = skipSpaces str (parseName str index)
quoteIndex = skipSpaces str (afterAttrName + 1)
#else
afterAttrName :: Int
afterAttrName = str -> Int -> Int
forall str. StringLike str => str -> Int -> Int
parseName str
str Int
index
quoteIndex :: Int
quoteIndex = Int
afterAttrName Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
#endif
usedChar :: Word8
usedChar = str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str Int
quoteIndex
{-# INLINE process #-}
{-# SPECIALISE process :: Process (Identity ()) -> ByteString -> Identity ()
#-}
{-# SPECIALISE process :: Process (State s ()) -> ByteString -> State s ()
#-}
{-# SPECIALISE process :: Process (ST s ()) -> ByteString -> ST s ()
#-}
{-# SPECIALISE process :: Process (IO ()) -> ByteString -> IO ()
#-}
{-# SPECIALISE process :: Process (Identity ()) -> ByteStringZeroTerminated -> Identity ()
#-}
{-# SPECIALISE process :: Process (State s ()) -> ByteStringZeroTerminated -> State s ()
#-}
{-# SPECIALISE process :: Process (ST s ()) -> ByteStringZeroTerminated -> ST s ()
#-}
{-# SPECIALISE process :: Process (IO ()) -> ByteStringZeroTerminated -> IO ()
#-}
s_index :: ByteString -> Int -> Word8
s_index :: ByteString -> Int -> Word8
s_index ByteString
ps Int
n
| Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0 = XenoException -> Word8
forall a e. Exception e => e -> a
throw (Int -> ByteString -> XenoException
XenoStringIndexProblem Int
n ByteString
ps)
| Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= ByteString -> Int
S.length ByteString
ps = XenoException -> Word8
forall a e. Exception e => e -> a
throw (Int -> ByteString -> XenoException
XenoStringIndexProblem Int
n ByteString
ps)
| Bool
otherwise = ByteString
ps ByteString -> Int -> Word8
`SU.unsafeIndex` Int
n
{-# INLINE s_index #-}
skipSpaces :: (StringLike str) => str -> Int -> Int
skipSpaces :: forall str. StringLike str => str -> Int -> Int
skipSpaces str
str Int
i =
if Word8 -> Bool
isSpaceChar (str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str Int
i)
then str -> Int -> Int
forall str. StringLike str => str -> Int -> Int
skipSpaces str
str (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
else Int
i
{-# INLINE skipSpaces #-}
substring :: ByteString -> Int -> Int -> ByteString
substring :: ByteString -> Int -> Int -> ByteString
substring ByteString
s Int
start Int
end = Int -> ByteString -> ByteString
S.take (Int
end Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
start) (Int -> ByteString -> ByteString
S.drop Int
start ByteString
s)
{-# INLINE substring #-}
parseName :: (StringLike str) => str -> Int -> Int
parseName :: forall str. StringLike str => str -> Int -> Int
parseName str
str Int
index =
if Bool -> Bool
not (Word8 -> Bool
isNameChar1 (str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str Int
index))
then Int
index
else str -> Int -> Int
forall str. StringLike str => str -> Int -> Int
parseName' str
str (Int
index Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
{-# INLINE parseName #-}
parseName' :: (StringLike str) => str -> Int -> Int
parseName' :: forall str. StringLike str => str -> Int -> Int
parseName' str
str Int
index =
if Bool -> Bool
not (Word8 -> Bool
isNameChar (str -> Int -> Word8
forall str. StringLike str => str -> Int -> Word8
s_index' str
str Int
index))
then Int
index
else str -> Int -> Int
forall str. StringLike str => str -> Int -> Int
parseName' str
str (Int
index Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
{-# INLINE parseName' #-}
elemIndexFrom :: Word8 -> ByteString -> Int -> Maybe Int
elemIndexFrom :: Word8 -> ByteString -> Int -> Maybe Int
elemIndexFrom Word8
c ByteString
str Int
offset = (Int -> Int) -> Maybe Int -> Maybe Int
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
offset) (Word8 -> ByteString -> Maybe Int
S.elemIndex Word8
c (Int -> ByteString -> ByteString
S.drop Int
offset ByteString
str))
{-# INLINE elemIndexFrom #-}
isSpaceChar :: Word8 -> Bool
isSpaceChar :: Word8 -> Bool
isSpaceChar = Int -> Int -> Bool
forall a. Bits a => a -> Int -> Bool
testBit (Int
0b100000000000000000010011000000000 :: Int) (Int -> Bool) -> (Word8 -> Int) -> Word8 -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral
{-# INLINE isSpaceChar #-}
isNameChar1 :: Word8 -> Bool
isNameChar1 :: Word8 -> Bool
isNameChar1 Word8
c =
(Word8
c Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word8
97 Bool -> Bool -> Bool
&& Word8
c Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
122) Bool -> Bool -> Bool
|| (Word8
c Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word8
65 Bool -> Bool -> Bool
&& Word8
c Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
90) Bool -> Bool -> Bool
|| Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
95 Bool -> Bool -> Bool
|| Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
58
{-# INLINE isNameChar1 #-}
isNameChar :: Word8 -> Bool
isNameChar :: Word8 -> Bool
isNameChar Word8
char = (Word64
lowMaskIsNameChar Word64 -> Int -> Bool
forall a. Bits a => a -> Int -> Bool
`testBit` Int
char'low) Bool -> Bool -> Bool
|| (Word64
highMaskIsNameChar Word64 -> Int -> Bool
forall a. Bits a => a -> Int -> Bool
`testBit` Int
char'high)
where
char'low :: Int
char'low = Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
char
char'high :: Int
char'high = Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8
char Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
- Word8
64)
highMaskIsNameChar :: Word64
highMaskIsNameChar :: Word64
highMaskIsNameChar = Word64
0b11111111111111111111111111010000111111111111111111111111110
lowMaskIsNameChar :: Word64
lowMaskIsNameChar :: Word64
lowMaskIsNameChar = Word64
0b11111111111011000000000000000000000000000000000000000000000
{-# INLINE isNameChar #-}
quoteChar :: Word8
quoteChar :: Word8
quoteChar = Word8
39
doubleQuoteChar :: Word8
doubleQuoteChar :: Word8
doubleQuoteChar = Word8
34
equalChar :: Word8
equalChar :: Word8
equalChar = Word8
61
questionChar :: Word8
questionChar :: Word8
questionChar = Word8
63
slashChar :: Word8
slashChar :: Word8
slashChar = Word8
47
bangChar :: Word8
bangChar :: Word8
bangChar = Word8
33
commentChar :: Word8
= Word8
45
openTagChar :: Word8
openTagChar :: Word8
openTagChar = Word8
60
closeTagChar :: Word8
closeTagChar :: Word8
closeTagChar = Word8
62
openAngleBracketChar :: Word8
openAngleBracketChar :: Word8
openAngleBracketChar = Word8
91
closeAngleBracketChar :: Word8
closeAngleBracketChar :: Word8
closeAngleBracketChar = Word8
93
skipDoctype :: ByteString -> ByteString
skipDoctype :: ByteString -> ByteString
skipDoctype ByteString
arg =
if ByteString
"<!DOCTYPE" ByteString -> ByteString -> Bool
`S8.isPrefixOf` ByteString
bs
then let (ByteString
_, ByteString
rest)=ByteString
">" ByteString -> ByteString -> (ByteString, ByteString)
`S8.breakSubstring` ByteString
bs
in ByteString -> ByteString
skipBlanks (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
S8.drop Int
1 ByteString
rest
else ByteString
bs
where
bs :: ByteString
bs = ByteString -> ByteString
skipBlanks ByteString
arg
skipBlanks :: ByteString -> ByteString
skipBlanks = (Char -> Bool) -> ByteString -> ByteString
S8.dropWhile Char -> Bool
isSpace