Browse Source

Initial version

Marcos Dumay de Medeiros 9 years ago
commit
74cb8ad355
5 changed files with 238 additions and 0 deletions
  1. 7 0
      .gitignore
  2. 30 0
      LICENSE
  3. 2 0
      Setup.hs
  4. 167 0
      src/Data/Attoparsec/ByteString/Char8/Extras.hs
  5. 32 0
      tools-for-attoparsec.cabal

+ 7 - 0
.gitignore

@@ -0,0 +1,7 @@
+dist/
+.cabal-sandbox/
+cabal.sandbox.config
+*~
+**/*~
+
+

+ 30 - 0
LICENSE

@@ -0,0 +1,30 @@
+Copyright (c) 2016, Marcos Dumay de Medeiros
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above
+      copyright notice, this list of conditions and the following
+      disclaimer in the documentation and/or other materials provided
+      with the distribution.
+
+    * Neither the name of Marcos Dumay de Medeiros nor the names of other
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+ 2 - 0
Setup.hs

@@ -0,0 +1,2 @@
+import Distribution.Simple
+main = defaultMain

+ 167 - 0
src/Data/Attoparsec/ByteString/Char8/Extras.hs

@@ -0,0 +1,167 @@
+module Data.Attoparsec.ByteString.Char8.Extras (
+  -- * Tools for character and Word8 tests
+  isCHorizontalSpace,
+  skipHorizontalSpace,
+  asW8,
+  -- * Parsing for printable types
+  parseShow,
+  parseShowCI,
+  parsePrintable,
+  parsePrintableCI,
+  parseEnum,
+  parseEnumCI,
+  -- * Other parsing utilities
+  failParser,
+  decodeBase64,
+  parserList,
+  parserFold,
+  quotedString
+  ) where
+
+import Data.Word8 as W
+import qualified Data.Char as C
+import Data.Attoparsec.ByteString.Char8
+import qualified Data.Attoparsec.ByteString.Char8 as A
+import Data.ByteString (ByteString)
+import Control.Applicative ((<|>), (*>))
+import qualified Data.ByteString.Base64 as B64
+import qualified Codec.Binary.UTF8.String as UTF8
+import qualified Data.ByteString as BS
+
+
+utf8bs :: String -> ByteString
+utf8bs = BS.pack . UTF8.encode
+
+-- | A version of attoparsec's isHorizontalSpace that takes a Char as argument.
+isCHorizontalSpace :: Char -> Bool
+isCHorizontalSpace c = A.isHorizontalSpace . asW8 $ c
+
+-- | Skips through the characters that satisfy attoparsec's isHorizontalSpace.
+--   Never fails.
+skipHorizontalSpace :: Parser ()
+skipHorizontalSpace = A.skipWhile (A.isHorizontalSpace . asW8)
+
+-- | Converts a character into it's Word8 value, truncating any extra bytes.
+asW8 :: Char -> Word8
+asW8 = fromIntegral . C.ord
+
+-- | Makes a parser fail.
+failParser :: Parser a
+failParser = satisfy (\_ -> False) *> (return . error $ "Passed a parser that must always fail")
+
+{- |
+parserFold f x0
+
+Acquires functions by parsing the data with f, and applies it to the result of the previous function,
+ until f fails.
+-}
+parserFold :: (Parser (a -> a)) -> a -> Parser a
+parserFold f a = (
+  do
+    r <- f
+    let a' = r a
+    parserFold f a'
+  ) <|> return a
+
+{- |
+parserList parsers x0
+
+Applies the parsers in order, taking as parameter the returned value of
+the previous parser, starting with x0.
+
+That is, for example:
+
+parserList [p0, p1, p2] x0
+
+Will do:
+    x1 <- p0 x0
+    x2 <- p1 x1
+    x3 <- p2 x2
+    return x3
+-}
+parserList :: [(b -> Parser b)] -> b -> Parser b
+parserList [] e0 = return e0
+parserList (f:ff) e0 = do
+  e1 <- f e0
+  parserList ff e1
+
+{- |
+Given a list of options, returns the first one that is show like the input.
+
+Does not consume input on the case of failure.
+-}
+parseShow :: Show a => [a] -> Parser a
+parseShow [] = failParser
+parseShow (s:ss) = ((string . utf8bs . show $ s) *> return s) <|> parseShow ss
+
+-- | Like parseShow, but case insensitive.
+parseShowCI :: Show a => [a] -> Parser a
+parseShowCI [] = failParser
+parseShowCI (s:ss) = ((stringCI . utf8bs . show $ s) *> return s) <|> parseShow ss
+
+{- |
+Given a list of options and a printing function, returns the first option that is
+printed like the input.
+-}
+parsePrintable :: (a -> String) -> [a] -> Parser a
+parsePrintable _ [] = failParser
+parsePrintable f (s:ss) = ((stringCI . utf8bs . f $ s) *> return s) <|> parsePrintable f ss
+
+-- | Like parsePrintable, but case insensitive
+parsePrintableCI :: (a -> String) -> [a] -> Parser a
+parsePrintableCI _ [] = failParser
+parsePrintableCI f (s:ss) = ((stringCI . utf8bs . f $ s) *> return s) <|> parsePrintable f ss
+
+-- | Acts like parseShow, trying every possible value
+parseEnum :: (Enum a, Bounded a, Show a) => Parser a
+parseEnum = let
+  l = [minBound .. maxBound]
+  in parseShow l
+
+-- | Acts like parseShowCI, trying every possible value
+parseEnumCI :: (Enum a, Bounded a, Show a) => Parser a
+parseEnumCI = let
+  l = [minBound .. maxBound]
+  in parseShow l
+
+-- | Consumes base64 encoded text, returning its binary decoded value.
+decodeBase64 :: Parser ByteString
+decodeBase64 = do
+  b64 <- A.takeWhile isBase64Char
+  case B64.decode $ b64 of
+    Left _ -> failParser
+    Right v -> return v
+  where
+    isBase64Char :: Char -> Bool
+    isBase64Char c = C.isAlphaNum c || c == '+' || c == '/'
+
+data QuoteScannerState = Quote | Escape
+
+{- |
+quotedStr escape forbiden_plain forbiden_quote
+
+Parses a possibly quoted string, where the characters in
+forbiden_plain are forbiden in unquoted text, and the
+characters in forbiden_quote are forbiden in quoted text.
+
+No forbiden characters are assumed (not even space). Thus,
+if no forbiden character is supplied, the parser will not
+terminate.
+
+Any character may be escaped with the escape character.
+-}
+quotedString :: C.Char -> [C.Char] -> [C.Char] -> A.Parser ByteString
+quotedString e fp fq = A.scan [] quoteScanner
+  where
+    quoteScanner :: [QuoteScannerState] -> Char -> Maybe [QuoteScannerState]
+    quoteScanner [] c
+      | elem c "'\"" = Just [Quote]
+      | c == e = Just [Escape]
+      | elem c fp = Nothing
+      | otherwise = Just []
+    quoteScanner fs@(Quote:ss) c
+      | elem c "'\"" = Just ss
+      | c == e = Just (Escape:fs)
+      | elem c fq = Nothing
+      | otherwise = Just fs
+    quoteScanner (Escape:ss) _ = Just ss

+ 32 - 0
tools-for-attoparsec.cabal

@@ -0,0 +1,32 @@
+-- Initial tools-for-attoparsec.cabal generated by cabal init.  For further
+--  documentation, see http://haskell.org/cabal/users-guide/
+
+name:                tools-for-attoparsec
+version:             0.1.0.0
+synopsis:            Extra tools for attoparsec ByteString.Char8 parsers
+-- description:         
+homepage:            https://sealgram.com/git/haskell/tools-for-attoparsec
+license:             BSD3
+license-file:        LICENSE
+author:              Marcos Dumay de Medeiros
+maintainer:          marcos@marcosdumay.com
+-- copyright:           
+-- category:            
+build-type:          Simple
+-- extra-source-files:  
+cabal-version:       >=1.10
+
+library
+  exposed-modules:     Data.Attoparsec.ByteString.Char8.Extras
+  -- other-modules:       
+  -- other-extensions:    
+  build-depends:
+    base >=4.7 && <4.8,
+    word8 >=0.1,
+    attoparsec >=0.11,
+    bytestring >=0.10,
+    base64-bytestring >= 1.0,
+    utf8-string -any
+  hs-source-dirs:      src
+  ghc-options: -Wall -fno-warn-unused-do-bind -fwarn-incomplete-patterns -threaded
+  default-language:    Haskell2010