{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE TypeSynonymInstances #-} {-# OPTIONS_GHC -fno-warn-orphans #-} module Hcompta.Lib.Regex where import qualified Data.Array import Data.Data () import qualified Data.Char import qualified Data.List import qualified Text.Regex.TDFA as R import qualified Text.Regex.TDFA.Text () import Text.Regex.TDFA.Common as R import qualified Text.Regex.TDFA.IntArrTrieSet as R.IntArrTrieSet import Data.Typeable () import Data.Text (Text) -- * The 'Regex' type type Regex = R.Regex type Replacement = String -- * Constructors -- | Parse the given 'String' to a 'Regex'. of_String :: String -> R.Regex of_String = R.makeRegex instance Read R.Regex where readsPrec _ s = [(R.makeRegex s, "")] instance Show R.Regex where show _ = "Regex" -- instance Eq Regex where -- _x == _y = True deriving instance Eq a => Eq (R.IntArrTrieSet.TrieSet a) deriving instance Eq R.CompOption deriving instance Eq R.DFA deriving instance Eq R.DT deriving instance Eq R.ExecOption deriving instance Eq R.GroupInfo deriving instance Eq R.Instructions deriving instance Eq R.Transition instance Eq (R.Position -> R.OrbitTransformer) where _x == _y = True deriving instance Eq R.Regex -- | Parse the given 'String' to a 'R.Regex' (monadic version). of_StringM :: Monad m => String -> m R.Regex of_StringM = R.makeRegexM -- * Matchers match :: R.Regex -> Text -> Bool match = R.match -- | Synonym to Text.R.Regex.TDFA.'Text.R.Regex.TDFA.=~'. (=~) :: ( R.RegexMaker R.Regex R.CompOption R.ExecOption source , R.RegexContext R.Regex source1 target ) => source1 -> source -> target (=~) = (R.=~) -- | Synonym to Text.R.Regex.TDFA.'Text.R.Regex.TDFA.=~~'. (=~~) :: ( R.RegexMaker R.Regex R.CompOption R.ExecOption source , R.RegexContext R.Regex source1 target,Monad m ) => source1 -> source -> m target (=~~) = (R.=~~) -- * Replacers replace :: R.Regex -> Replacement -> String -> String replace re repl s = Data.List.foldl (replace_match repl) s (reverse $ R.match re s :: [R.MatchText String]) replace_match :: Replacement -> String -> R.MatchText String -> String replace_match replacement s match_groups = concat [prev, repl, next] where ((_, (ofs, len)):_) = Data.Array.elems match_groups -- NOTE: groups should have 0-based indexes, -- and there should always be at least one, -- since this is a match. (prev, next') = Data.List.splitAt ofs s next = Data.List.drop len next' repl = replace_all (of_String "\\\\[0-9]+") (replace_backref match_groups) replacement replace_backref :: R.MatchText String -> String -> String replace_backref match_groups ('\\':s@(_:_)) | Data.List.all Data.Char.isDigit s = case read s of n | Data.List.elem n $ Data.Array.indices match_groups -> fst ((Data.Array.!) match_groups n) _ -> error $ concat ["no match group exists for backreference \"\\", s, "\""] replace_backref _ s = error $ concat $ ["replace_backref called on non-numeric-backref \"", s, "\""] replace_all :: R.Regex -> (String -> String) -> String -> String replace_all re f s = concat (reverse $ remaining:done) where (_ind, remaining, done) = Data.List.foldl' go (0, s, []) $ (R.getAllMatches $ R.match re s::[(Int, Int)]) go (ind, prev_ok_next, repl) (ofs, len) = let (prev, ok_next) = Data.List.splitAt (ofs - ind) prev_ok_next (ok, next) = Data.List.splitAt len ok_next in ( ofs + len , next , f ok : prev : repl )