]> Git — Sourcephile - comptalang.git/blob - lib/Hcompta/Lib/Regex.hs
Polissage : CLI.Command.*.
[comptalang.git] / lib / Hcompta / Lib / Regex.hs
1 {-# LANGUAGE DeriveDataTypeable #-}
2 {-# LANGUAGE FlexibleContexts #-}
3 {-# LANGUAGE FlexibleInstances #-}
4 {-# LANGUAGE StandaloneDeriving #-}
5 {-# LANGUAGE TypeSynonymInstances #-}
6 {-# OPTIONS_GHC -fno-warn-orphans #-}
7 module Hcompta.Lib.Regex where
8
9 import qualified Data.Array
10 import Data.Data ()
11 import qualified Data.Char
12 import qualified Data.List
13 import qualified Text.Regex.TDFA as R
14 import qualified Text.Regex.TDFA.Text ()
15 import Text.Regex.TDFA.Common as R
16 import qualified Text.Regex.TDFA.IntArrTrieSet as R.IntArrTrieSet
17 import Data.Typeable ()
18 import Data.Text (Text)
19
20 -- * The 'Regex' type
21
22 type Regex = R.Regex
23 type Replacement = String
24
25 -- * Constructors
26
27 -- | Parse the given 'String' to a 'Regex'.
28 of_String :: String -> R.Regex
29 of_String = R.makeRegex
30
31 instance Read R.Regex where
32 readsPrec _ s = [(R.makeRegex s, "")]
33 instance Show R.Regex where
34 show _ = "Regex"
35 -- instance Eq Regex where
36 -- _x == _y = True
37 deriving instance Eq a => Eq (R.IntArrTrieSet.TrieSet a)
38 deriving instance Eq R.CompOption
39 deriving instance Eq R.DFA
40 deriving instance Eq R.DT
41 deriving instance Eq R.ExecOption
42 deriving instance Eq R.GroupInfo
43 deriving instance Eq R.Instructions
44 deriving instance Eq R.Transition
45 instance Eq (R.Position -> R.OrbitTransformer) where
46 _x == _y = True
47 deriving instance Eq R.Regex
48
49 -- | Parse the given 'String' to a 'R.Regex' (monadic version).
50 of_StringM :: Monad m => String -> m R.Regex
51 of_StringM = R.makeRegexM
52
53 -- * Matchers
54
55 match :: R.Regex -> Text -> Bool
56 match = R.match
57
58 -- | Synonym to Text.R.Regex.TDFA.'Text.R.Regex.TDFA.=~'.
59 (=~) :: ( R.RegexMaker R.Regex R.CompOption R.ExecOption source
60 , R.RegexContext R.Regex source1 target )
61 => source1 -> source -> target
62 (=~) = (R.=~)
63
64 -- | Synonym to Text.R.Regex.TDFA.'Text.R.Regex.TDFA.=~~'.
65 (=~~) :: ( R.RegexMaker R.Regex R.CompOption R.ExecOption source
66 , R.RegexContext R.Regex source1 target,Monad m )
67 => source1 -> source -> m target
68 (=~~) = (R.=~~)
69
70 -- * Replacers
71
72 replace :: R.Regex -> Replacement -> String -> String
73 replace re repl s =
74 Data.List.foldl (replace_match repl) s
75 (reverse $ R.match re s :: [R.MatchText String])
76
77 replace_match :: Replacement -> String -> R.MatchText String -> String
78 replace_match replacement s match_groups =
79 concat [prev, repl, next]
80 where
81 ((_, (ofs, len)):_) = Data.Array.elems match_groups
82 -- NOTE: groups should have 0-based indexes,
83 -- and there should always be at least one,
84 -- since this is a match.
85 (prev, next') = Data.List.splitAt ofs s
86 next = Data.List.drop len next'
87 repl = replace_all (of_String "\\\\[0-9]+") (replace_backref match_groups) replacement
88
89 replace_backref :: R.MatchText String -> String -> String
90 replace_backref match_groups ('\\':s@(_:_))
91 | Data.List.all Data.Char.isDigit s =
92 case read s of
93 n | Data.List.elem n $ Data.Array.indices match_groups -> fst ((Data.Array.!) match_groups n)
94 _ -> error $ concat ["no match group exists for backreference \"\\", s, "\""]
95 replace_backref _ s =
96 error $ concat $ ["replace_backref called on non-numeric-backref \"", s, "\""]
97
98 replace_all :: R.Regex -> (String -> String) -> String -> String
99 replace_all re f s =
100 concat (reverse $ remaining:done)
101 where
102 (_ind, remaining, done) = Data.List.foldl' go (0, s, []) $
103 (R.getAllMatches $ R.match re s::[(Int, Int)])
104 go (ind, prev_ok_next, repl) (ofs, len) =
105 let (prev, ok_next) = Data.List.splitAt (ofs - ind) prev_ok_next
106 (ok, next) = Data.List.splitAt len ok_next in
107 ( ofs + len
108 , next
109 , f ok : prev : repl
110 )