|
@@ -0,0 +1,167 @@
|
|
|
+module Data.Attoparsec.ByteString.Char8.Extras (
|
|
|
+ -- * Tools for character and Word8 tests
|
|
|
+ isCHorizontalSpace,
|
|
|
+ skipHorizontalSpace,
|
|
|
+ asW8,
|
|
|
+ -- * Parsing for printable types
|
|
|
+ parseShow,
|
|
|
+ parseShowCI,
|
|
|
+ parsePrintable,
|
|
|
+ parsePrintableCI,
|
|
|
+ parseEnum,
|
|
|
+ parseEnumCI,
|
|
|
+ -- * Other parsing utilities
|
|
|
+ failParser,
|
|
|
+ decodeBase64,
|
|
|
+ parserList,
|
|
|
+ parserFold,
|
|
|
+ quotedString
|
|
|
+ ) where
|
|
|
+
|
|
|
+import Data.Word8 as W
|
|
|
+import qualified Data.Char as C
|
|
|
+import Data.Attoparsec.ByteString.Char8
|
|
|
+import qualified Data.Attoparsec.ByteString.Char8 as A
|
|
|
+import Data.ByteString (ByteString)
|
|
|
+import Control.Applicative ((<|>), (*>))
|
|
|
+import qualified Data.ByteString.Base64 as B64
|
|
|
+import qualified Codec.Binary.UTF8.String as UTF8
|
|
|
+import qualified Data.ByteString as BS
|
|
|
+
|
|
|
+
|
|
|
+utf8bs :: String -> ByteString
|
|
|
+utf8bs = BS.pack . UTF8.encode
|
|
|
+
|
|
|
+-- | A version of attoparsec's isHorizontalSpace that takes a Char as argument.
|
|
|
+isCHorizontalSpace :: Char -> Bool
|
|
|
+isCHorizontalSpace c = A.isHorizontalSpace . asW8 $ c
|
|
|
+
|
|
|
+-- | Skips through the characters that satisfy attoparsec's isHorizontalSpace.
|
|
|
+-- Never fails.
|
|
|
+skipHorizontalSpace :: Parser ()
|
|
|
+skipHorizontalSpace = A.skipWhile (A.isHorizontalSpace . asW8)
|
|
|
+
|
|
|
+-- | Converts a character into it's Word8 value, truncating any extra bytes.
|
|
|
+asW8 :: Char -> Word8
|
|
|
+asW8 = fromIntegral . C.ord
|
|
|
+
|
|
|
+-- | Makes a parser fail.
|
|
|
+failParser :: Parser a
|
|
|
+failParser = satisfy (\_ -> False) *> (return . error $ "Passed a parser that must always fail")
|
|
|
+
|
|
|
+{- |
|
|
|
+parserFold f x0
|
|
|
+
|
|
|
+Acquires functions by parsing the data with f, and applies it to the result of the previous function,
|
|
|
+ until f fails.
|
|
|
+-}
|
|
|
+parserFold :: (Parser (a -> a)) -> a -> Parser a
|
|
|
+parserFold f a = (
|
|
|
+ do
|
|
|
+ r <- f
|
|
|
+ let a' = r a
|
|
|
+ parserFold f a'
|
|
|
+ ) <|> return a
|
|
|
+
|
|
|
+{- |
|
|
|
+parserList parsers x0
|
|
|
+
|
|
|
+Applies the parsers in order, taking as parameter the returned value of
|
|
|
+the previous parser, starting with x0.
|
|
|
+
|
|
|
+That is, for example:
|
|
|
+
|
|
|
+parserList [p0, p1, p2] x0
|
|
|
+
|
|
|
+Will do:
|
|
|
+ x1 <- p0 x0
|
|
|
+ x2 <- p1 x1
|
|
|
+ x3 <- p2 x2
|
|
|
+ return x3
|
|
|
+-}
|
|
|
+parserList :: [(b -> Parser b)] -> b -> Parser b
|
|
|
+parserList [] e0 = return e0
|
|
|
+parserList (f:ff) e0 = do
|
|
|
+ e1 <- f e0
|
|
|
+ parserList ff e1
|
|
|
+
|
|
|
+{- |
|
|
|
+Given a list of options, returns the first one that is show like the input.
|
|
|
+
|
|
|
+Does not consume input on the case of failure.
|
|
|
+-}
|
|
|
+parseShow :: Show a => [a] -> Parser a
|
|
|
+parseShow [] = failParser
|
|
|
+parseShow (s:ss) = ((string . utf8bs . show $ s) *> return s) <|> parseShow ss
|
|
|
+
|
|
|
+-- | Like parseShow, but case insensitive.
|
|
|
+parseShowCI :: Show a => [a] -> Parser a
|
|
|
+parseShowCI [] = failParser
|
|
|
+parseShowCI (s:ss) = ((stringCI . utf8bs . show $ s) *> return s) <|> parseShow ss
|
|
|
+
|
|
|
+{- |
|
|
|
+Given a list of options and a printing function, returns the first option that is
|
|
|
+printed like the input.
|
|
|
+-}
|
|
|
+parsePrintable :: (a -> String) -> [a] -> Parser a
|
|
|
+parsePrintable _ [] = failParser
|
|
|
+parsePrintable f (s:ss) = ((stringCI . utf8bs . f $ s) *> return s) <|> parsePrintable f ss
|
|
|
+
|
|
|
+-- | Like parsePrintable, but case insensitive
|
|
|
+parsePrintableCI :: (a -> String) -> [a] -> Parser a
|
|
|
+parsePrintableCI _ [] = failParser
|
|
|
+parsePrintableCI f (s:ss) = ((stringCI . utf8bs . f $ s) *> return s) <|> parsePrintable f ss
|
|
|
+
|
|
|
+-- | Acts like parseShow, trying every possible value
|
|
|
+parseEnum :: (Enum a, Bounded a, Show a) => Parser a
|
|
|
+parseEnum = let
|
|
|
+ l = [minBound .. maxBound]
|
|
|
+ in parseShow l
|
|
|
+
|
|
|
+-- | Acts like parseShowCI, trying every possible value
|
|
|
+parseEnumCI :: (Enum a, Bounded a, Show a) => Parser a
|
|
|
+parseEnumCI = let
|
|
|
+ l = [minBound .. maxBound]
|
|
|
+ in parseShow l
|
|
|
+
|
|
|
+-- | Consumes base64 encoded text, returning its binary decoded value.
|
|
|
+decodeBase64 :: Parser ByteString
|
|
|
+decodeBase64 = do
|
|
|
+ b64 <- A.takeWhile isBase64Char
|
|
|
+ case B64.decode $ b64 of
|
|
|
+ Left _ -> failParser
|
|
|
+ Right v -> return v
|
|
|
+ where
|
|
|
+ isBase64Char :: Char -> Bool
|
|
|
+ isBase64Char c = C.isAlphaNum c || c == '+' || c == '/'
|
|
|
+
|
|
|
+data QuoteScannerState = Quote | Escape
|
|
|
+
|
|
|
+{- |
|
|
|
+quotedStr escape forbiden_plain forbiden_quote
|
|
|
+
|
|
|
+Parses a possibly quoted string, where the characters in
|
|
|
+forbiden_plain are forbiden in unquoted text, and the
|
|
|
+characters in forbiden_quote are forbiden in quoted text.
|
|
|
+
|
|
|
+No forbiden characters are assumed (not even space). Thus,
|
|
|
+if no forbiden character is supplied, the parser will not
|
|
|
+terminate.
|
|
|
+
|
|
|
+Any character may be escaped with the escape character.
|
|
|
+-}
|
|
|
+quotedString :: C.Char -> [C.Char] -> [C.Char] -> A.Parser ByteString
|
|
|
+quotedString e fp fq = A.scan [] quoteScanner
|
|
|
+ where
|
|
|
+ quoteScanner :: [QuoteScannerState] -> Char -> Maybe [QuoteScannerState]
|
|
|
+ quoteScanner [] c
|
|
|
+ | elem c "'\"" = Just [Quote]
|
|
|
+ | c == e = Just [Escape]
|
|
|
+ | elem c fp = Nothing
|
|
|
+ | otherwise = Just []
|
|
|
+ quoteScanner fs@(Quote:ss) c
|
|
|
+ | elem c "'\"" = Just ss
|
|
|
+ | c == e = Just (Escape:fs)
|
|
|
+ | elem c fq = Nothing
|
|
|
+ | otherwise = Just fs
|
|
|
+ quoteScanner (Escape:ss) _ = Just ss
|