{-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE TypeSynonymInstances #-} {-# OPTIONS_GHC -fno-warn-orphans #-} module Hcompta.Lib.Regex where import Data.Data () import Data.Typeable () import qualified Data.Array import qualified Data.Char import qualified Data.List import qualified Text.Regex.TDFA as R -- * The 'Regex' type type Regex = R.Regex type Replacement = String -- * Constructors -- | Parse the given 'String' to a 'Regex'. of_String :: String -> Regex of_String = R.makeRegex instance Read Regex where readsPrec _ s = [(R.makeRegex s, "")] instance Show Regex where show _ = "Regex" -- | Parse the given 'String' to a 'Regex' (monadic version). of_StringM :: Monad m => String -> m Regex of_StringM = R.makeRegexM -- * Matchers -- | Synonym to Text.Regex.TDFA.'Text.Regex.TDFA.=~'. (=~) :: ( R.RegexMaker Regex R.CompOption R.ExecOption source , R.RegexContext Regex source1 target ) => source1 -> source -> target (=~) = (R.=~) -- | Synonym to Text.Regex.TDFA.'Text.Regex.TDFA.=~~'. (=~~) :: ( R.RegexMaker Regex R.CompOption R.ExecOption source , R.RegexContext Regex source1 target,Monad m ) => source1 -> source -> m target (=~~) = (R.=~~) -- * Replacers replace :: Regex -> Replacement -> String -> String replace re repl s = Data.List.foldl (replace_match repl) s (reverse $ R.match re s :: [R.MatchText String]) replace_match :: Replacement -> String -> R.MatchText String -> String replace_match replacement s match_groups = concat [prev, repl, next] where ((_, (ofs, len)):_) = Data.Array.elems match_groups -- NOTE: groups should have 0-based indexes, -- and there should always be at least one, -- since this is a match. (prev, next') = Data.List.splitAt ofs s next = Data.List.drop len next' repl = replace_all (of_String "\\\\[0-9]+") (replace_backref match_groups) replacement replace_backref :: R.MatchText String -> String -> String replace_backref match_groups ('\\':s@(_:_)) | Data.List.all Data.Char.isDigit s = case read s of n | Data.List.elem n $ Data.Array.indices match_groups -> fst ((Data.Array.!) match_groups n) _ -> error $ concat ["no match group exists for backreference \"\\", s, "\""] replace_backref _ s = error $ concat $ ["replace_backref called on non-numeric-backref \"", s, "\""] replace_all :: Regex -> (String -> String) -> String -> String replace_all re f s = concat (reverse $ remaining:done) where (_ind, remaining, done) = Data.List.foldl' go (0, s, []) $ R.getAllMatches $ R.match re s go (ind, prev_ok_next, repl) (ofs, len) = let (prev, ok_next) = Data.List.splitAt (ofs - ind) prev_ok_next (ok, next) = Data.List.splitAt len ok_next in ( ofs + len , next , f ok : prev : repl )