Extras.hs 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167
  1. module Data.Attoparsec.ByteString.Char8.Extras (
  2. -- * Tools for character and Word8 tests
  3. isCHorizontalSpace,
  4. skipHorizontalSpace,
  5. asW8,
  6. -- * Parsing for printable types
  7. parseShow,
  8. parseShowCI,
  9. parsePrintable,
  10. parsePrintableCI,
  11. parseEnum,
  12. parseEnumCI,
  13. -- * Other parsing utilities
  14. failParser,
  15. decodeBase64,
  16. parserList,
  17. parserFold,
  18. quotedString
  19. ) where
  20. import Data.Word8 as W
  21. import qualified Data.Char as C
  22. import Data.Attoparsec.ByteString.Char8
  23. import qualified Data.Attoparsec.ByteString.Char8 as A
  24. import Data.ByteString (ByteString)
  25. import Control.Applicative ((<|>), (*>))
  26. import qualified Data.ByteString.Base64 as B64
  27. import qualified Codec.Binary.UTF8.String as UTF8
  28. import qualified Data.ByteString as BS
  29. utf8bs :: String -> ByteString
  30. utf8bs = BS.pack . UTF8.encode
  31. -- | A version of attoparsec's isHorizontalSpace that takes a Char as argument.
  32. isCHorizontalSpace :: Char -> Bool
  33. isCHorizontalSpace c = A.isHorizontalSpace . asW8 $ c
  34. -- | Skips through the characters that satisfy attoparsec's isHorizontalSpace.
  35. -- Never fails.
  36. skipHorizontalSpace :: Parser ()
  37. skipHorizontalSpace = A.skipWhile (A.isHorizontalSpace . asW8)
  38. -- | Converts a character into it's Word8 value, truncating any extra bytes.
  39. asW8 :: Char -> Word8
  40. asW8 = fromIntegral . C.ord
  41. -- | Makes a parser fail.
  42. failParser :: Parser a
  43. failParser = satisfy (\_ -> False) *> (return . error $ "Passed a parser that must always fail")
  44. {- |
  45. parserFold f x0
  46. Acquires functions by parsing the data with f, and applies it to the result of the previous function,
  47. until f fails.
  48. -}
  49. parserFold :: (Parser (a -> a)) -> a -> Parser a
  50. parserFold f a = (
  51. do
  52. r <- f
  53. let a' = r a
  54. parserFold f a'
  55. ) <|> return a
  56. {- |
  57. parserList parsers x0
  58. Applies the parsers in order, taking as parameter the returned value of
  59. the previous parser, starting with x0.
  60. That is, for example:
  61. parserList [p0, p1, p2] x0
  62. Will do:
  63. x1 <- p0 x0
  64. x2 <- p1 x1
  65. x3 <- p2 x2
  66. return x3
  67. -}
  68. parserList :: [(b -> Parser b)] -> b -> Parser b
  69. parserList [] e0 = return e0
  70. parserList (f:ff) e0 = do
  71. e1 <- f e0
  72. parserList ff e1
  73. {- |
  74. Given a list of options, returns the first one that is show like the input.
  75. Does not consume input on the case of failure.
  76. -}
  77. parseShow :: Show a => [a] -> Parser a
  78. parseShow [] = failParser
  79. parseShow (s:ss) = ((string . utf8bs . show $ s) *> return s) <|> parseShow ss
  80. -- | Like parseShow, but case insensitive.
  81. parseShowCI :: Show a => [a] -> Parser a
  82. parseShowCI [] = failParser
  83. parseShowCI (s:ss) = ((stringCI . utf8bs . show $ s) *> return s) <|> parseShow ss
  84. {- |
  85. Given a list of options and a printing function, returns the first option that is
  86. printed like the input.
  87. -}
  88. parsePrintable :: (a -> String) -> [a] -> Parser a
  89. parsePrintable _ [] = failParser
  90. parsePrintable f (s:ss) = ((stringCI . utf8bs . f $ s) *> return s) <|> parsePrintable f ss
  91. -- | Like parsePrintable, but case insensitive
  92. parsePrintableCI :: (a -> String) -> [a] -> Parser a
  93. parsePrintableCI _ [] = failParser
  94. parsePrintableCI f (s:ss) = ((stringCI . utf8bs . f $ s) *> return s) <|> parsePrintable f ss
  95. -- | Acts like parseShow, trying every possible value
  96. parseEnum :: (Enum a, Bounded a, Show a) => Parser a
  97. parseEnum = let
  98. l = [minBound .. maxBound]
  99. in parseShow l
  100. -- | Acts like parseShowCI, trying every possible value
  101. parseEnumCI :: (Enum a, Bounded a, Show a) => Parser a
  102. parseEnumCI = let
  103. l = [minBound .. maxBound]
  104. in parseShow l
  105. -- | Consumes base64 encoded text, returning its binary decoded value.
  106. decodeBase64 :: Parser ByteString
  107. decodeBase64 = do
  108. b64 <- A.takeWhile isBase64Char
  109. case B64.decode $ b64 of
  110. Left _ -> failParser
  111. Right v -> return v
  112. where
  113. isBase64Char :: Char -> Bool
  114. isBase64Char c = C.isAlphaNum c || c == '+' || c == '/'
  115. data QuoteScannerState = Quote | Escape
  116. {- |
  117. quotedStr escape forbiden_plain forbiden_quote
  118. Parses a possibly quoted string, where the characters in
  119. forbiden_plain are forbiden in unquoted text, and the
  120. characters in forbiden_quote are forbiden in quoted text.
  121. No forbiden characters are assumed (not even space). Thus,
  122. if no forbiden character is supplied, the parser will not
  123. terminate.
  124. Any character may be escaped with the escape character.
  125. -}
  126. quotedString :: C.Char -> [C.Char] -> [C.Char] -> A.Parser ByteString
  127. quotedString e fp fq = A.scan [] quoteScanner
  128. where
  129. quoteScanner :: [QuoteScannerState] -> Char -> Maybe [QuoteScannerState]
  130. quoteScanner [] c
  131. | elem c "'\"" = Just [Quote]
  132. | c == e = Just [Escape]
  133. | elem c fp = Nothing
  134. | otherwise = Just []
  135. quoteScanner fs@(Quote:ss) c
  136. | elem c "'\"" = Just ss
  137. | c == e = Just (Escape:fs)
  138. | elem c fq = Nothing
  139. | otherwise = Just fs
  140. quoteScanner (Escape:ss) _ = Just ss