]> Git — Sourcephile - haskell/symantic-parser.git/blob - src/Symantic/Parser/Machine/Input/Text/Buffer.hs
build: nix: update inputs
[haskell/symantic-parser.git] / src / Symantic / Parser / Machine / Input / Text / Buffer.hs
1 {-# LANGUAGE BangPatterns, CPP, MagicHash, RankNTypes, RecordWildCards,
2 UnboxedTuples #-}
3
4 -- |
5 -- Module : Data.Attoparsec.Text.Buffer
6 -- Copyright : Bryan O'Sullivan 2007-2015
7 -- License : BSD3
8 --
9 -- Maintainer : bos@serpentine.com
10 -- Stability : experimental
11 -- Portability : GHC
12 --
13 -- An immutable buffer that supports cheap appends.
14
15 -- A Buffer is divided into an immutable read-only zone, followed by a
16 -- mutable area that we've preallocated, but not yet written to.
17 --
18 -- We overallocate at the end of a Buffer so that we can cheaply
19 -- append. Since a user of an existing Buffer cannot see past the end
20 -- of its immutable zone into the data that will change during an
21 -- append, this is safe.
22 --
23 -- Once we run out of space at the end of a Buffer, we do the usual
24 -- doubling of the buffer size.
25
26 module Symantic.Parser.Machine.Input.Text.Buffer
27 (
28 Buffer
29 , buffer
30 , unbuffer
31 , unbufferAt
32 , length
33 , pappend
34 , iter
35 , iter_
36 , substring
37 , lengthCodeUnits
38 , dropCodeUnits
39 ) where
40
41 import Control.Exception (assert)
42 import Data.List (foldl1')
43 import Data.Monoid as Mon (Monoid(..))
44 import Data.Semigroup (Semigroup(..))
45 import Data.Text ()
46 import Data.Text.Internal (Text(..))
47 #if MIN_VERSION_text(2,0,0)
48 import Data.Text.Internal.Encoding.Utf8 (utf8LengthByLeader)
49 import Data.Text.Unsafe (iterArray, lengthWord8)
50 #else
51 import Data.Bits (shiftR)
52 import Data.Text.Internal.Encoding.Utf16 (chr2)
53 import Data.Text.Internal.Unsafe.Char (unsafeChr)
54 import Data.Text.Unsafe (lengthWord16)
55 #endif
56 import Data.Text.Unsafe (Iter(..))
57 import Foreign.Storable (sizeOf)
58 import GHC.Exts (Int(..), indexIntArray#, unsafeCoerce#, writeIntArray#)
59 import GHC.ST (ST(..), runST)
60 import Prelude hiding (length)
61 import qualified Data.Text.Array as A
62
63 -- If _cap is zero, this buffer is empty.
64 data Buffer = Buf {
65 _arr :: {-# UNPACK #-} !A.Array
66 , _off :: {-# UNPACK #-} !Int
67 , _len :: {-# UNPACK #-} !Int
68 , _cap :: {-# UNPACK #-} !Int
69 , _gen :: {-# UNPACK #-} !Int
70 }
71
72 instance Show Buffer where
73 showsPrec p = showsPrec p . unbuffer
74
75 -- | The initial 'Buffer' has no mutable zone, so we can avoid all
76 -- copies in the (hopefully) common case of no further input being fed
77 -- to us.
78 buffer :: Text -> Buffer
79 buffer (Text arr off len) = Buf arr off len len 0
80
81 unbuffer :: Buffer -> Text
82 unbuffer (Buf arr off len _ _) = Text arr off len
83
84 unbufferAt :: Int -> Buffer -> Text
85 unbufferAt s (Buf arr off len _ _) =
86 assert (s >= 0 && s <= len) $
87 Text arr (off+s) (len-s)
88
89 instance Semigroup Buffer where
90 (Buf _ _ _ 0 _) <> b = b
91 a <> (Buf _ _ _ 0 _) = a
92 buf <> (Buf arr off len _ _) = append buf arr off len
93 {-# INLINE (<>) #-}
94
95 instance Monoid Buffer where
96 mempty = Buf A.empty 0 0 0 0
97 {-# INLINE mempty #-}
98
99 mappend = (<>)
100
101 mconcat [] = Mon.mempty
102 mconcat xs = foldl1' (<>) xs
103
104 pappend :: Buffer -> Text -> Buffer
105 pappend (Buf _ _ _ 0 _) t = buffer t
106 pappend buf (Text arr off len) = append buf arr off len
107
108 append :: Buffer -> A.Array -> Int -> Int -> Buffer
109 append (Buf arr0 off0 len0 cap0 gen0) !arr1 !off1 !len1 = runST $ do
110 #if MIN_VERSION_text(2,0,0)
111 let woff = sizeOf (0::Int)
112 #else
113 let woff = sizeOf (0::Int) `shiftR` 1
114 #endif
115 newlen = len0 + len1
116 !gen = if gen0 == 0 then 0 else readGen arr0
117 if gen == gen0 && newlen <= cap0
118 then do
119 let newgen = gen + 1
120 marr <- unsafeThaw arr0
121 writeGen marr newgen
122 #if MIN_VERSION_text(2,0,0)
123 A.copyI len1 marr (off0+len0) arr1 off1
124 #else
125 A.copyI marr (off0+len0) arr1 off1 (off0+newlen)
126 #endif
127 arr2 <- A.unsafeFreeze marr
128 return (Buf arr2 off0 newlen cap0 newgen)
129 else do
130 let newcap = newlen * 2
131 newgen = 1
132 marr <- A.new (newcap + woff)
133 writeGen marr newgen
134 #if MIN_VERSION_text(2,0,0)
135 A.copyI len0 marr woff arr0 off0
136 A.copyI len1 marr (woff+len0) arr1 off1
137 #else
138 A.copyI marr woff arr0 off0 (woff+len0)
139 A.copyI marr (woff+len0) arr1 off1 (woff+newlen)
140 #endif
141 arr2 <- A.unsafeFreeze marr
142 return (Buf arr2 woff newlen newcap newgen)
143
144 length :: Buffer -> Int
145 length (Buf _ _ len _ _) = len
146 {-# INLINE length #-}
147
148 substring :: Int -> Int -> Buffer -> Text
149 substring s l (Buf arr off len _ _) =
150 assert (s >= 0 && s <= len) .
151 assert (l >= 0 && l <= len-s) $
152 Text arr (off+s) l
153 {-# INLINE substring #-}
154
155 #if MIN_VERSION_text(2,0,0)
156
157 lengthCodeUnits :: Text -> Int
158 lengthCodeUnits = lengthWord8
159
160 dropCodeUnits :: Int -> Buffer -> Text
161 dropCodeUnits s (Buf arr off len _ _) =
162 assert (s >= 0 && s <= len) $
163 Text arr (off+s) (len-s)
164 {-# INLINE dropCodeUnits #-}
165
166 -- | /O(1)/ Iterate (unsafely) one step forwards through a UTF-8
167 -- array, returning the current character and the delta to add to give
168 -- the next offset to iterate at.
169 iter :: Buffer -> Int -> Iter
170 iter (Buf arr off _ _ _) i = iterArray arr (off + i)
171 {-# INLINE iter #-}
172
173 -- | /O(1)/ Iterate one step through a UTF-8 array, returning the
174 -- delta to add to give the next offset to iterate at.
175 iter_ :: Buffer -> Int -> Int
176 iter_ (Buf arr off _ _ _) i = utf8LengthByLeader $ A.unsafeIndex arr (off+i)
177 {-# INLINE iter_ #-}
178
179 unsafeThaw :: A.Array -> ST s (A.MArray s)
180 unsafeThaw (A.ByteArray a) = ST $ \s# ->
181 (# s#, A.MutableByteArray (unsafeCoerce# a) #)
182
183 readGen :: A.Array -> Int
184 readGen (A.ByteArray a) = case indexIntArray# a 0# of r# -> I# r#
185
186 writeGen :: A.MArray s -> Int -> ST s ()
187 writeGen (A.MutableByteArray a) (I# gen#) = ST $ \s0# ->
188 case writeIntArray# a 0# gen# s0# of
189 s1# -> (# s1#, () #)
190
191 #else
192
193 lengthCodeUnits :: Text -> Int
194 lengthCodeUnits = lengthWord16
195
196 dropCodeUnits :: Int -> Buffer -> Text
197 dropCodeUnits s (Buf arr off len _ _) =
198 assert (s >= 0 && s <= len) $
199 Text arr (off+s) (len-s)
200 {-# INLINE dropCodeUnits #-}
201
202 -- | /O(1)/ Iterate (unsafely) one step forwards through a UTF-16
203 -- array, returning the current character and the delta to add to give
204 -- the next offset to iterate at.
205 iter :: Buffer -> Int -> Iter
206 iter (Buf arr off _ _ _) i
207 | m < 0xD800 || m > 0xDBFF = Iter (unsafeChr m) 1
208 | otherwise = Iter (chr2 m n) 2
209 where m = A.unsafeIndex arr j
210 n = A.unsafeIndex arr k
211 j = off + i
212 k = j + 1
213 {-# INLINE iter #-}
214
215 -- | /O(1)/ Iterate one step through a UTF-16 array, returning the
216 -- delta to add to give the next offset to iterate at.
217 iter_ :: Buffer -> Int -> Int
218 iter_ (Buf arr off _ _ _) i | m < 0xD800 || m > 0xDBFF = 1
219 | otherwise = 2
220 where m = A.unsafeIndex arr (off+i)
221 {-# INLINE iter_ #-}
222
223 unsafeThaw :: A.Array -> ST s (A.MArray s)
224 unsafeThaw A.Array{..} = ST $ \s# ->
225 (# s#, A.MArray (unsafeCoerce# aBA) #)
226
227 readGen :: A.Array -> Int
228 readGen a = case indexIntArray# (A.aBA a) 0# of r# -> I# r#
229
230 writeGen :: A.MArray s -> Int -> ST s ()
231 writeGen a (I# gen#) = ST $ \s0# ->
232 case writeIntArray# (A.maBA a) 0# gen# s0# of
233 s1# -> (# s1#, () #)
234
235 #endif