]> Git — Sourcephile - haskell/symantic-parser.git/blob - src/Symantic/Parser/Machine/Input.hs
impl: add attoparsec's Text.Buffer
[haskell/symantic-parser.git] / src / Symantic / Parser / Machine / Input.hs
1 {-# LANGUAGE AllowAmbiguousTypes #-}
2 {-# LANGUAGE MagicHash #-}
3 {-# LANGUAGE TemplateHaskell #-}
4 {-# LANGUAGE UnboxedTuples #-}
5 {-# LANGUAGE PolyKinds #-}
6 module Symantic.Parser.Machine.Input where
7
8 import Data.Array.Base (UArray(..), listArray)
9 -- import Data.Array.Unboxed (UArray)
10 import Data.Bool
11 import Data.ByteString.Internal (ByteString(..))
12 import Data.Char (Char)
13 import Data.Word (Word8)
14 import Data.Eq (Eq(..))
15 import Data.Function (on)
16 import Data.Int (Int)
17 import Data.Kind (Type)
18 import Data.Ord (Ord(..), Ordering)
19 import Data.String (String)
20 import Data.Text ()
21 import Data.Text.Array ({-aBA, empty-})
22 import Data.Text.Internal (Text(..))
23 import Data.Text.Unsafe (iter, Iter(..), iter_, reverseIter_)
24 import Text.Show (Show(..))
25 import GHC.Exts (Int(..), Char(..) {-, RuntimeRep(..)-}, TYPE)
26 import GHC.Word (Word8(..))
27 import GHC.ForeignPtr (ForeignPtr(..), ForeignPtrContents)
28 import GHC.Prim ({-Int#,-} Addr#, nullAddr#, indexWideCharArray#, {-indexWord16Array#,-} readWord8OffAddr#, word2Int#, chr#, touch#, realWorld#, plusAddr#, (+#))
29 import Language.Haskell.TH (CodeQ)
30 import Prelude ((+), (-), error)
31 import qualified Data.ByteString.Lazy.Internal as BSL
32 import qualified Data.List as List
33
34 -- * Class 'Cursorable'
35 class Show cur => Cursorable cur where
36 offset :: cur -> Int
37 compareOffset :: CodeQ (cur -> cur -> Ordering)
38 compareOffset = [|| compare `on` offset ||]
39 lowerOffset :: CodeQ (cur -> cur -> Bool)
40 sameOffset :: CodeQ (cur -> cur -> Bool)
41 shiftRight :: CodeQ (Int -> cur -> cur)
42 instance Cursorable Int where
43 offset = \inp -> inp
44 compareOffset = [|| compare @Int ||]
45 lowerOffset = [|| (<) @Int ||]
46 sameOffset = [|| (==) @Int ||]
47 shiftRight = [|| (+) @Int ||]
48 instance Cursorable Text where
49 offset = \(Text _ i _) -> i
50 lowerOffset = [|| \(Text _ i _) (Text _ j _) -> i < j ||]
51 sameOffset = [|| \(Text _ i _) (Text _ j _) -> i == j ||]
52 shiftRight = [||shiftRightText||]
53
54 shiftRightText :: Int -> Text -> Text
55 shiftRightText i (Text arr off unconsumed) = go i off unconsumed
56 where
57 go 0 off' unconsumed' = Text arr off' unconsumed'
58 go n off' unconsumed'
59 | unconsumed' > 0
60 , !d <- iter_ (Text arr off' unconsumed') 0
61 = go (n-1) (off'+d) (unconsumed'-d)
62 | otherwise = Text arr off' unconsumed'
63
64 shiftLeftText :: Int -> Text -> Text
65 shiftLeftText i (Text arr off unconsumed) = go i off unconsumed
66 where
67 go 0 off' unconsumed' = Text arr off' unconsumed'
68 go n off' unconsumed'
69 | off' > 0
70 , !d <- reverseIter_ (Text arr off' unconsumed') 0
71 = go (n-1) (off'+d) (unconsumed'-d)
72 | otherwise = Text arr off' unconsumed'
73
74 instance Cursorable UnpackedLazyByteString where
75 offset = \(UnpackedLazyByteString i _ _ _ _ _) -> i
76 lowerOffset = [||\(UnpackedLazyByteString i _ _ _ _ _) (UnpackedLazyByteString j _ _ _ _ _) -> i <= j||]
77 sameOffset = [||\(UnpackedLazyByteString i _ _ _ _ _) (UnpackedLazyByteString j _ _ _ _ _) -> i == j||]
78 shiftRight = [||shiftRightByteString||]
79
80 shiftRightByteString :: Int -> UnpackedLazyByteString -> UnpackedLazyByteString
81 shiftRightByteString j !(UnpackedLazyByteString i addr# final off size cs)
82 | j < size = UnpackedLazyByteString (i + j) addr# final (off + j) (size - j) cs
83 | otherwise = case cs of
84 BSL.Chunk (PS (ForeignPtr addr'# final') off' size') cs' ->
85 shiftRightByteString (j - size)
86 (UnpackedLazyByteString (i + size) addr'# final' off' size' cs')
87 BSL.Empty -> emptyUnpackedLazyByteString (i + size)
88
89 shiftLeftByteString :: Int -> UnpackedLazyByteString -> UnpackedLazyByteString
90 shiftLeftByteString j !(UnpackedLazyByteString i addr# final off size cs) =
91 UnpackedLazyByteString (i - d) addr# final (off - d) (size + d) cs
92 where d = min off j
93
94 offWith :: CodeQ (ts -> OffWith ts)
95 offWith = [|| OffWith 0 ||]
96
97 -- ** Type 'Text16'
98 newtype Text16 = Text16 Text
99 --newtype CacheText = CacheText Text
100 -- ** Type 'CharList'
101 newtype CharList = CharList String
102 -- ** Type 'Stream'
103 data Stream = {-# UNPACK #-} !Char :> Stream
104 nomore :: Stream
105 nomore = '\0' :> nomore
106 {-
107 instance Cursorable (OffWith Stream) where
108 lowerOffset = [|| \(OffWith i _) (OffWith j _) -> i < j ||]
109 sameOffset = [|| \(OffWith i _) (OffWith j _) -> i == j ||]
110 shiftRight = [|| \i (OffWith o ts) -> OffWith (o + i) (dropStream i ts) ||]
111 where
112 dropStream :: Int -> Stream -> Stream
113 dropStream 0 cs = cs
114 dropStream n (_ :> cs) = dropStream (n-1) cs
115 -}
116
117 -- ** Type 'OffWith'
118 data OffWith ts = OffWith {-# UNPACK #-} !Int ts
119 deriving (Show)
120
121 instance Cursorable (OffWith String) where
122 offset = \(OffWith i _) -> i
123 lowerOffset = [|| \(OffWith i _) (OffWith j _) -> i < j ||]
124 sameOffset = [|| \(OffWith i _) (OffWith j _) -> i == j ||]
125 shiftRight = [|| \i (OffWith o ts) -> OffWith (o + i) (List.drop i ts) ||]
126
127 -- ** Type 'OffWithStreamAnd'
128 data OffWithStreamAnd ts = OffWithStreamAnd {-# UNPACK #-} !Int !Stream ts
129 -- ** Type 'UnpackedLazyByteString'
130 data UnpackedLazyByteString = UnpackedLazyByteString
131 {-# UNPACK #-} !Int
132 !Addr#
133 ForeignPtrContents
134 {-# UNPACK #-} !Int
135 {-# UNPACK #-} !Int
136 BSL.ByteString
137 instance Show UnpackedLazyByteString where
138 show (UnpackedLazyByteString _i _addr _p _off _size _cs) = "UnpackedLazyByteString" -- FIXME
139
140 {-# INLINE emptyUnpackedLazyByteString #-}
141 emptyUnpackedLazyByteString :: Int -> UnpackedLazyByteString
142 emptyUnpackedLazyByteString i =
143 UnpackedLazyByteString i nullAddr#
144 (error "nullForeignPtr") 0 0 BSL.Empty
145
146 -- * Class 'Inputable'
147 class Cursorable (Cursor inp) => Inputable inp where
148 type Cursor inp :: Type
149 type InputToken inp :: Type
150 cursorOf :: CodeQ inp -> CodeQ (CursorOps inp)
151
152 type CursorOps (inp :: TYPE r) =
153 (# {-init-} Cursor inp
154 , {-more-} Cursor inp -> Bool
155 , {-next-} Cursor inp -> (# InputToken inp, Cursor inp #)
156 #)
157
158 instance Inputable String where
159 type Cursor String = Int
160 type InputToken String = Char
161 cursorOf input = cursorOf @(UArray Int Char)
162 [|| listArray (0, List.length $$input-1) $$input ||]
163 instance Inputable (UArray Int Char) where
164 type Cursor (UArray Int Char) = Int
165 type InputToken (UArray Int Char) = Char
166 cursorOf qinput = [||
167 -- Pattern bindings containing unlifted types should use an outermost bang pattern.
168 let !(UArray _ _ size input#) = $$qinput
169 next (I# i#) =
170 (# C# (indexWideCharArray# input# i#)
171 , I# (i# +# 1#)
172 #)
173 in (# 0, (< size), next #)
174 ||]
175 instance Inputable Text where
176 type Cursor Text = Text
177 type InputToken Text = Char
178 cursorOf inp = [||
179 let _ = "cursorOf" in
180 let next t@(Text arr off unconsumed) =
181 let !(Iter c d) = iter t 0 in
182 (# c, Text arr (off+d) (unconsumed-d) #)
183 more (Text _ _ unconsumed) = unconsumed > 0
184 in (# $$inp, more, next #)
185 ||]
186 instance Inputable ByteString where
187 type Cursor ByteString = Int
188 type InputToken ByteString = Word8
189 cursorOf qinput = [||
190 -- Pattern bindings containing unlifted types should use an outermost bang pattern.
191 let !(PS (ForeignPtr addr# final) off size) = $$qinput
192 next i@(I# i#) =
193 case readWord8OffAddr# (addr# `plusAddr#` i#) 0# realWorld# of
194 (# s', x #) -> case touch# final s' of
195 _ -> (# W8# (x), i + 1 #)
196 in (# off, (< size), next #)
197 ||]
198 instance Inputable BSL.ByteString where
199 type Cursor BSL.ByteString = UnpackedLazyByteString
200 type InputToken BSL.ByteString = Char
201 cursorOf qinput = [||
202 let next (UnpackedLazyByteString i addr# final off@(I# off#) size cs) =
203 case readWord8OffAddr# addr# off# realWorld# of
204 (# s', x #) -> case touch# final s' of
205 _ ->
206 (# C# (chr# (word2Int# x))
207 , if size /= 1 then UnpackedLazyByteString (i+1) addr# final (off+1) (size-1) cs
208 else case cs of
209 BSL.Chunk (PS (ForeignPtr addr'# final') off' size') cs' ->
210 UnpackedLazyByteString (i+1) addr'# final' off' size' cs'
211 BSL.Empty -> emptyUnpackedLazyByteString (i+1)
212 #)
213 more (UnpackedLazyByteString _ _ _ _ 0 _) = False
214 more _ = True
215 init = case $$qinput of
216 BSL.Chunk (PS (ForeignPtr addr# final) off size) cs ->
217 UnpackedLazyByteString 0 addr# final off size cs
218 BSL.Empty -> emptyUnpackedLazyByteString 0
219 in (# init, more, next #)
220 ||]
221 {-
222 instance Inputable Text16 where
223 type Cursor Text16 = Int
224 cursorOf qinput = [||
225 let Text16 (Text arr off size) = $$qinput
226 arr# = aBA arr
227 next (I# i#) =
228 (# C# (chr# (word2Int# (indexWord16Array# arr# i#)))
229 , I# (i# +# 1#) #)
230 in (# off, (< size), next #)
231 ||]
232 instance Inputable CharList where
233 type Cursor CharList = OffWith String
234 cursorOf qinput = [||
235 let CharList input = $$qinput
236 next (OffWith i (c:cs)) = (# c, OffWith (i+1) cs #)
237 size = List.length input
238 more (OffWith i _) = i < size
239 --more (OffWith _ []) = False
240 --more _ = True
241 in (# $$offWith input, more, next #)
242 ||]
243 instance Inputable Stream where
244 type Cursor Stream = OffWith Stream
245 cursorOf qinput = [||
246 let next (OffWith o (c :> cs)) = (# c, OffWith (o + 1) cs #)
247 in (# $$offWith $$qinput, const True, next #)
248 ||]
249 -}
250 {-
251 -- type instance Cursor CacheText = (Text, Stream)
252 -- type instance Cursor BSL.ByteString = OffWith BSL.ByteString
253 -}
254