module Data.Attoparsec.ByteString.Char8.Extras ( -- * Tools for character and Word8 tests isCHorizontalSpace, skipHorizontalSpace, asW8, -- * Parsing for printable types parseShow, parseShowCI, parsePrintable, parsePrintableCI, parseEnum, parseEnumCI, -- * Other parsing utilities failParser, decodeBase64, parserList, parserFold, quotedString ) where import Data.Word8 as W import qualified Data.Char as C import Data.Attoparsec.ByteString.Char8 import qualified Data.Attoparsec.ByteString.Char8 as A import Data.ByteString (ByteString) import Control.Applicative ((<|>), (*>)) import qualified Data.ByteString.Base64 as B64 import qualified Codec.Binary.UTF8.String as UTF8 import qualified Data.ByteString as BS utf8bs :: String -> ByteString utf8bs = BS.pack . UTF8.encode -- | A version of attoparsec's isHorizontalSpace that takes a Char as argument. isCHorizontalSpace :: Char -> Bool isCHorizontalSpace c = A.isHorizontalSpace . asW8 $ c -- | Skips through the characters that satisfy attoparsec's isHorizontalSpace. -- Never fails. skipHorizontalSpace :: Parser () skipHorizontalSpace = A.skipWhile (A.isHorizontalSpace . asW8) -- | Converts a character into it's Word8 value, truncating any extra bytes. asW8 :: Char -> Word8 asW8 = fromIntegral . C.ord -- | Makes a parser fail. failParser :: Parser a failParser = satisfy (\_ -> False) *> (return . error $ "Passed a parser that must always fail") {- | parserFold f x0 Acquires functions by parsing the data with f, and applies it to the result of the previous function, until f fails. -} parserFold :: (Parser (a -> a)) -> a -> Parser a parserFold f a = ( do r <- f let a' = r a parserFold f a' ) <|> return a {- | parserList parsers x0 Applies the parsers in order, taking as parameter the returned value of the previous parser, starting with x0. That is, for example: parserList [p0, p1, p2] x0 Will do: x1 <- p0 x0 x2 <- p1 x1 x3 <- p2 x2 return x3 -} parserList :: [(b -> Parser b)] -> b -> Parser b parserList [] e0 = return e0 parserList (f:ff) e0 = do e1 <- f e0 parserList ff e1 {- | Given a list of options, returns the first one that is show like the input. Does not consume input on the case of failure. -} parseShow :: Show a => [a] -> Parser a parseShow [] = failParser parseShow (s:ss) = ((string . utf8bs . show $ s) *> return s) <|> parseShow ss -- | Like parseShow, but case insensitive. parseShowCI :: Show a => [a] -> Parser a parseShowCI [] = failParser parseShowCI (s:ss) = ((stringCI . utf8bs . show $ s) *> return s) <|> parseShow ss {- | Given a list of options and a printing function, returns the first option that is printed like the input. -} parsePrintable :: (a -> String) -> [a] -> Parser a parsePrintable _ [] = failParser parsePrintable f (s:ss) = ((stringCI . utf8bs . f $ s) *> return s) <|> parsePrintable f ss -- | Like parsePrintable, but case insensitive parsePrintableCI :: (a -> String) -> [a] -> Parser a parsePrintableCI _ [] = failParser parsePrintableCI f (s:ss) = ((stringCI . utf8bs . f $ s) *> return s) <|> parsePrintable f ss -- | Acts like parseShow, trying every possible value parseEnum :: (Enum a, Bounded a, Show a) => Parser a parseEnum = let l = [minBound .. maxBound] in parseShow l -- | Acts like parseShowCI, trying every possible value parseEnumCI :: (Enum a, Bounded a, Show a) => Parser a parseEnumCI = let l = [minBound .. maxBound] in parseShow l -- | Consumes base64 encoded text, returning its binary decoded value. decodeBase64 :: Parser ByteString decodeBase64 = do b64 <- A.takeWhile isBase64Char case B64.decode $ b64 of Left _ -> failParser Right v -> return v where isBase64Char :: Char -> Bool isBase64Char c = C.isAlphaNum c || c == '+' || c == '/' data QuoteScannerState = Quote | Escape {- | quotedStr escape forbiden_plain forbiden_quote Parses a possibly quoted string, where the characters in forbiden_plain are forbiden in unquoted text, and the characters in forbiden_quote are forbiden in quoted text. No forbiden characters are assumed (not even space). Thus, if no forbiden character is supplied, the parser will not terminate. Any character may be escaped with the escape character. -} quotedString :: C.Char -> [C.Char] -> [C.Char] -> A.Parser ByteString quotedString e fp fq = A.scan [] quoteScanner where quoteScanner :: [QuoteScannerState] -> Char -> Maybe [QuoteScannerState] quoteScanner [] c | elem c "'\"" = Just [Quote] | c == e = Just [Escape] | elem c fp = Nothing | otherwise = Just [] quoteScanner fs@(Quote:ss) c | elem c "'\"" = Just ss | c == e = Just (Escape:fs) | elem c fq = Nothing | otherwise = Just fs quoteScanner (Escape:ss) _ = Just ss