]> Git — Sourcephile - comptalang.git/blob - lib/Hcompta/Lib/Regex.hs
Déplace hcompta-calculus vers lol-calculus et lol-typing
[comptalang.git] / lib / Hcompta / Lib / Regex.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE StandaloneDeriving #-}
4 {-# OPTIONS_GHC -fno-warn-orphans #-}
5 module Hcompta.Lib.Regex where
6
7 import Control.Monad (Monad(..))
8 import qualified Data.Array
9 import Data.Bool
10 import qualified Data.Char
11 import Data.Data ()
12 import Data.Eq (Eq(..))
13 import Data.Foldable (concat)
14 import qualified Data.List
15 import Data.String (String)
16 import Data.Text (Text)
17 import Data.Typeable ()
18 import Prelude (($), Int, Num(..), Show(..), fst, error, reverse)
19 import Text.Read(Read(..), read)
20 import qualified Text.Regex.TDFA as R
21 import Text.Regex.TDFA.Common as R
22 import qualified Text.Regex.TDFA.IntArrTrieSet as R.IntArrTrieSet
23 import qualified Text.Regex.TDFA.Text ()
24
25
26 -- * The 'Regex' type
27
28 type Regex = R.Regex
29 type Replacement = String
30
31 -- * Constructors
32
33 -- | Parse the given 'String' to a 'Regex'.
34 of_String :: String -> R.Regex
35 of_String = R.makeRegex
36
37 instance Read R.Regex where
38 readsPrec _ s = [(R.makeRegex s, "")]
39 instance Show R.Regex where
40 show _ = "Regex"
41 -- instance Eq Regex where
42 -- _x == _y = True
43 deriving instance Eq a => Eq (R.IntArrTrieSet.TrieSet a)
44 deriving instance Eq R.CompOption
45 deriving instance Eq R.DFA
46 deriving instance Eq R.DT
47 deriving instance Eq R.ExecOption
48 deriving instance Eq R.GroupInfo
49 deriving instance Eq R.Instructions
50 deriving instance Eq R.Transition
51 instance Eq (R.Position -> R.OrbitTransformer) where
52 _x == _y = True
53 deriving instance Eq R.Regex
54
55 -- | Parse the given 'String' to a 'R.Regex' (monadic version).
56 of_StringM :: Monad m => String -> m R.Regex
57 of_StringM = R.makeRegexM
58
59 -- * Matchers
60
61 match :: R.Regex -> Text -> Bool
62 match = R.match
63
64 -- | Synonym to Text.R.Regex.TDFA.'Text.R.Regex.TDFA.=~'.
65 (=~) :: ( R.RegexMaker R.Regex R.CompOption R.ExecOption source
66 , R.RegexContext R.Regex source1 target )
67 => source1 -> source -> target
68 (=~) = (R.=~)
69
70 -- | Synonym to Text.R.Regex.TDFA.'Text.R.Regex.TDFA.=~~'.
71 (=~~) :: ( R.RegexMaker R.Regex R.CompOption R.ExecOption source
72 , R.RegexContext R.Regex source1 target,Monad m )
73 => source1 -> source -> m target
74 (=~~) = (R.=~~)
75
76 -- * Replacers
77
78 replace :: R.Regex -> Replacement -> String -> String
79 replace re repl s =
80 Data.List.foldl (replace_match repl) s
81 (reverse $ R.match re s :: [R.MatchText String])
82
83 replace_match :: Replacement -> String -> R.MatchText String -> String
84 replace_match replacement s match_groups =
85 concat [prev, repl, next]
86 where
87 ((_, (ofs, len)):_) = Data.Array.elems match_groups
88 -- NOTE: groups should have 0-based indexes,
89 -- and there should always be at least one,
90 -- since this is a match.
91 (prev, next') = Data.List.splitAt ofs s
92 next = Data.List.drop len next'
93 repl = replace_all (of_String "\\\\[0-9]+") (replace_backref match_groups) replacement
94
95 replace_backref :: R.MatchText String -> String -> String
96 replace_backref match_groups ('\\':s@(_:_))
97 | Data.List.all Data.Char.isDigit s =
98 case read s of
99 n | Data.List.elem n $ Data.Array.indices match_groups -> fst ((Data.Array.!) match_groups n)
100 _ -> error $ concat ["no match group exists for backreference \"\\", s, "\""]
101 replace_backref _ s =
102 error $ concat $ ["replace_backref called on non-numeric-backref \"", s, "\""]
103
104 replace_all :: R.Regex -> (String -> String) -> String -> String
105 replace_all re f s =
106 concat (reverse $ remaining:done)
107 where
108 (_ind, remaining, done) = Data.List.foldl' go (0, s, []) $
109 (R.getAllMatches $ R.match re s::[(Int, Int)])
110 go (ind, prev_ok_next, repl) (ofs, len) =
111 let (prev, ok_next) = Data.List.splitAt (ofs - ind) prev_ok_next
112 (ok, next) = Data.List.splitAt len ok_next in
113 ( ofs + len
114 , next
115 , f ok : prev : repl
116 )