]> Git — Sourcephile - comptalang.git/blob - lib/Hcompta/Lib/Regex.hs
WIP : Format.Ledger.Read : Model.Transaction.Posting
[comptalang.git] / lib / Hcompta / Lib / Regex.hs
1 {-# LANGUAGE DeriveDataTypeable #-}
2 {-# LANGUAGE FlexibleContexts #-}
3 {-# LANGUAGE FlexibleInstances #-}
4 {-# LANGUAGE StandaloneDeriving #-}
5 {-# LANGUAGE TypeSynonymInstances #-}
6 {-# OPTIONS_GHC -fno-warn-orphans #-}
7 module Hcompta.Lib.Regex where
8
9 import Data.Data
10 import Data.Typeable ()
11
12 import qualified Data.Array
13 import qualified Data.Char
14 import qualified Data.List
15 import qualified Text.Regex.TDFA as R
16
17 -- * The 'Regex' type
18
19 type Regex = R.Regex
20 type Replacement = String
21
22 -- * Constructors
23
24 -- | Parse the given 'String' to a 'Regex'.
25 of_String :: String -> Regex
26 of_String = R.makeRegex
27
28 instance Read Regex where
29 readsPrec _ s = [(R.makeRegex s, "")]
30 instance Show Regex where
31 show _ = "Regex"
32
33 -- | Parse the given 'String' to a 'Regex' (monadic version).
34 of_StringM :: Monad m => String -> m Regex
35 of_StringM = R.makeRegexM
36
37 -- * Matchers
38
39 -- | Synonym to Text.Regex.TDFA.'Text.Regex.TDFA.=~'.
40 (=~) :: ( R.RegexMaker Regex R.CompOption R.ExecOption source
41 , R.RegexContext Regex source1 target )
42 => source1 -> source -> target
43 (=~) = (R.=~)
44
45 -- | Synonym to Text.Regex.TDFA.'Text.Regex.TDFA.=~~'.
46 (=~~) :: ( R.RegexMaker Regex R.CompOption R.ExecOption source
47 , R.RegexContext Regex source1 target,Monad m )
48 => source1 -> source -> m target
49 (=~~) = (R.=~~)
50
51 -- * Replacers
52
53 replace :: Regex -> Replacement -> String -> String
54 replace re repl s =
55 Data.List.foldl (replace_match repl) s
56 (reverse $ R.match re s :: [R.MatchText String])
57
58 replace_match :: Replacement -> String -> R.MatchText String -> String
59 replace_match replacement s match_groups =
60 concat [prev, repl, next]
61 where
62 ((_, (ofs, len)):_) = Data.Array.elems match_groups
63 -- NOTE: groups should have 0-based indexes,
64 -- and there should always be at least one,
65 -- since this is a match.
66 (prev, next') = Data.List.splitAt ofs s
67 next = Data.List.drop len next'
68 repl = replace_all (of_String "\\\\[0-9]+") (replace_backref match_groups) replacement
69
70 replace_backref :: R.MatchText String -> String -> String
71 replace_backref match_groups ('\\':s@(_:_))
72 | Data.List.all Data.Char.isDigit s =
73 case read s of
74 n | Data.List.elem n $ Data.Array.indices match_groups -> fst ((Data.Array.!) match_groups n)
75 _ -> error $ concat ["no match group exists for backreference \"\\", s, "\""]
76 replace_backref _ s =
77 error $ concat $ ["replace_backref called on non-numeric-backref \"", s, "\""]
78
79 replace_all :: Regex -> (String -> String) -> String -> String
80 replace_all re f s =
81 concat (reverse $ remaining:done)
82 where
83 (_ind, remaining, done) = Data.List.foldl' go (0, s, []) $ R.getAllMatches $ R.match re s
84 go (ind, prev_ok_next, repl) (ofs, len) =
85 let (prev, ok_next) = Data.List.splitAt (ofs - ind) prev_ok_next
86 (ok, next) = Data.List.splitAt len ok_next in
87 ( ofs + len
88 , next
89 , f ok : prev : repl
90 )