]> Git — Sourcephile - comptalang.git/blob - lib/Hcompta/Lib/Regex.hs
Ajout : CLI.Lang : traductions.
[comptalang.git] / lib / Hcompta / Lib / Regex.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE StandaloneDeriving #-}
4 {-# OPTIONS_GHC -fno-warn-orphans #-}
5 module Hcompta.Lib.Regex where
6
7 import Control.Monad (Monad(..))
8 import qualified Data.Array
9 import Data.Bool
10 import qualified Data.Char
11 import Data.Data ()
12 import Data.Eq (Eq(..))
13 import Data.Foldable (concat)
14 import qualified Data.List
15 import Data.String (String)
16 import Data.Text (Text)
17 import Data.Typeable ()
18 import Text.Read(Read(..), read)
19 import qualified Text.Regex.TDFA as R
20 import Text.Regex.TDFA.Common as R
21 import qualified Text.Regex.TDFA.IntArrTrieSet as R.IntArrTrieSet
22 import qualified Text.Regex.TDFA.Text ()
23 import Prelude ( ($)
24 , Int
25 , Num(..)
26 , Show(..)
27 , fst
28 , error
29 , reverse
30 )
31
32
33 -- * The 'Regex' type
34
35 type Regex = R.Regex
36 type Replacement = String
37
38 -- * Constructors
39
40 -- | Parse the given 'String' to a 'Regex'.
41 of_String :: String -> R.Regex
42 of_String = R.makeRegex
43
44 instance Read R.Regex where
45 readsPrec _ s = [(R.makeRegex s, "")]
46 instance Show R.Regex where
47 show _ = "Regex"
48 -- instance Eq Regex where
49 -- _x == _y = True
50 deriving instance Eq a => Eq (R.IntArrTrieSet.TrieSet a)
51 deriving instance Eq R.CompOption
52 deriving instance Eq R.DFA
53 deriving instance Eq R.DT
54 deriving instance Eq R.ExecOption
55 deriving instance Eq R.GroupInfo
56 deriving instance Eq R.Instructions
57 deriving instance Eq R.Transition
58 instance Eq (R.Position -> R.OrbitTransformer) where
59 _x == _y = True
60 deriving instance Eq R.Regex
61
62 -- | Parse the given 'String' to a 'R.Regex' (monadic version).
63 of_StringM :: Monad m => String -> m R.Regex
64 of_StringM = R.makeRegexM
65
66 -- * Matchers
67
68 match :: R.Regex -> Text -> Bool
69 match = R.match
70
71 -- | Synonym to Text.R.Regex.TDFA.'Text.R.Regex.TDFA.=~'.
72 (=~) :: ( R.RegexMaker R.Regex R.CompOption R.ExecOption source
73 , R.RegexContext R.Regex source1 target )
74 => source1 -> source -> target
75 (=~) = (R.=~)
76
77 -- | Synonym to Text.R.Regex.TDFA.'Text.R.Regex.TDFA.=~~'.
78 (=~~) :: ( R.RegexMaker R.Regex R.CompOption R.ExecOption source
79 , R.RegexContext R.Regex source1 target,Monad m )
80 => source1 -> source -> m target
81 (=~~) = (R.=~~)
82
83 -- * Replacers
84
85 replace :: R.Regex -> Replacement -> String -> String
86 replace re repl s =
87 Data.List.foldl (replace_match repl) s
88 (reverse $ R.match re s :: [R.MatchText String])
89
90 replace_match :: Replacement -> String -> R.MatchText String -> String
91 replace_match replacement s match_groups =
92 concat [prev, repl, next]
93 where
94 ((_, (ofs, len)):_) = Data.Array.elems match_groups
95 -- NOTE: groups should have 0-based indexes,
96 -- and there should always be at least one,
97 -- since this is a match.
98 (prev, next') = Data.List.splitAt ofs s
99 next = Data.List.drop len next'
100 repl = replace_all (of_String "\\\\[0-9]+") (replace_backref match_groups) replacement
101
102 replace_backref :: R.MatchText String -> String -> String
103 replace_backref match_groups ('\\':s@(_:_))
104 | Data.List.all Data.Char.isDigit s =
105 case read s of
106 n | Data.List.elem n $ Data.Array.indices match_groups -> fst ((Data.Array.!) match_groups n)
107 _ -> error $ concat ["no match group exists for backreference \"\\", s, "\""]
108 replace_backref _ s =
109 error $ concat $ ["replace_backref called on non-numeric-backref \"", s, "\""]
110
111 replace_all :: R.Regex -> (String -> String) -> String -> String
112 replace_all re f s =
113 concat (reverse $ remaining:done)
114 where
115 (_ind, remaining, done) = Data.List.foldl' go (0, s, []) $
116 (R.getAllMatches $ R.match re s::[(Int, Int)])
117 go (ind, prev_ok_next, repl) (ofs, len) =
118 let (prev, ok_next) = Data.List.splitAt (ofs - ind) prev_ok_next
119 (ok, next) = Data.List.splitAt len ok_next in
120 ( ofs + len
121 , next
122 , f ok : prev : repl
123 )