1 {-# LANGUAGE BangPatterns, CPP, MagicHash, RankNTypes, RecordWildCards,
5 -- Module : Data.Attoparsec.Text.Buffer
6 -- Copyright : Bryan O'Sullivan 2007-2015
9 -- Maintainer : bos@serpentine.com
10 -- Stability : experimental
13 -- An immutable buffer that supports cheap appends.
15 -- A Buffer is divided into an immutable read-only zone, followed by a
16 -- mutable area that we've preallocated, but not yet written to.
18 -- We overallocate at the end of a Buffer so that we can cheaply
19 -- append. Since a user of an existing Buffer cannot see past the end
20 -- of its immutable zone into the data that will change during an
21 -- append, this is safe.
23 -- Once we run out of space at the end of a Buffer, we do the usual
24 -- doubling of the buffer size.
26 module Symantic.Parser.Machine.Input.Text.Buffer
41 import Control.Exception (assert)
42 import Data.List (foldl1')
43 import Data.Monoid as Mon (Monoid(..))
44 import Data.Semigroup (Semigroup(..))
46 import Data.Text.Internal (Text(..))
47 #if MIN_VERSION_text(2,0,0)
48 import Data.Text.Internal.Encoding.Utf8 (utf8LengthByLeader)
49 import Data.Text.Unsafe (iterArray, lengthWord8)
51 import Data.Bits (shiftR)
52 import Data.Text.Internal.Encoding.Utf16 (chr2)
53 import Data.Text.Internal.Unsafe.Char (unsafeChr)
54 import Data.Text.Unsafe (lengthWord16)
56 import Data.Text.Unsafe (Iter(..))
57 import Foreign.Storable (sizeOf)
58 import GHC.Exts (Int(..), indexIntArray#, unsafeCoerce#, writeIntArray#)
59 import GHC.ST (ST(..), runST)
60 import Prelude hiding (length)
61 import qualified Data.Text.Array as A
63 -- If _cap is zero, this buffer is empty.
65 _arr :: {-# UNPACK #-} !A.Array
66 , _off :: {-# UNPACK #-} !Int
67 , _len :: {-# UNPACK #-} !Int
68 , _cap :: {-# UNPACK #-} !Int
69 , _gen :: {-# UNPACK #-} !Int
72 instance Show Buffer where
73 showsPrec p = showsPrec p . unbuffer
75 -- | The initial 'Buffer' has no mutable zone, so we can avoid all
76 -- copies in the (hopefully) common case of no further input being fed
78 buffer :: Text -> Buffer
79 buffer (Text arr off len) = Buf arr off len len 0
81 unbuffer :: Buffer -> Text
82 unbuffer (Buf arr off len _ _) = Text arr off len
84 unbufferAt :: Int -> Buffer -> Text
85 unbufferAt s (Buf arr off len _ _) =
86 assert (s >= 0 && s <= len) $
87 Text arr (off+s) (len-s)
89 instance Semigroup Buffer where
90 (Buf _ _ _ 0 _) <> b = b
91 a <> (Buf _ _ _ 0 _) = a
92 buf <> (Buf arr off len _ _) = append buf arr off len
95 instance Monoid Buffer where
96 mempty = Buf A.empty 0 0 0 0
101 mconcat [] = Mon.mempty
102 mconcat xs = foldl1' (<>) xs
104 pappend :: Buffer -> Text -> Buffer
105 pappend (Buf _ _ _ 0 _) t = buffer t
106 pappend buf (Text arr off len) = append buf arr off len
108 append :: Buffer -> A.Array -> Int -> Int -> Buffer
109 append (Buf arr0 off0 len0 cap0 gen0) !arr1 !off1 !len1 = runST $ do
110 #if MIN_VERSION_text(2,0,0)
111 let woff = sizeOf (0::Int)
113 let woff = sizeOf (0::Int) `shiftR` 1
116 !gen = if gen0 == 0 then 0 else readGen arr0
117 if gen == gen0 && newlen <= cap0
120 marr <- unsafeThaw arr0
122 #if MIN_VERSION_text(2,0,0)
123 A.copyI len1 marr (off0+len0) arr1 off1
125 A.copyI marr (off0+len0) arr1 off1 (off0+newlen)
127 arr2 <- A.unsafeFreeze marr
128 return (Buf arr2 off0 newlen cap0 newgen)
130 let newcap = newlen * 2
132 marr <- A.new (newcap + woff)
134 #if MIN_VERSION_text(2,0,0)
135 A.copyI len0 marr woff arr0 off0
136 A.copyI len1 marr (woff+len0) arr1 off1
138 A.copyI marr woff arr0 off0 (woff+len0)
139 A.copyI marr (woff+len0) arr1 off1 (woff+newlen)
141 arr2 <- A.unsafeFreeze marr
142 return (Buf arr2 woff newlen newcap newgen)
144 length :: Buffer -> Int
145 length (Buf _ _ len _ _) = len
146 {-# INLINE length #-}
148 substring :: Int -> Int -> Buffer -> Text
149 substring s l (Buf arr off len _ _) =
150 assert (s >= 0 && s <= len) .
151 assert (l >= 0 && l <= len-s) $
153 {-# INLINE substring #-}
155 #if MIN_VERSION_text(2,0,0)
157 lengthCodeUnits :: Text -> Int
158 lengthCodeUnits = lengthWord8
160 dropCodeUnits :: Int -> Buffer -> Text
161 dropCodeUnits s (Buf arr off len _ _) =
162 assert (s >= 0 && s <= len) $
163 Text arr (off+s) (len-s)
164 {-# INLINE dropCodeUnits #-}
166 -- | /O(1)/ Iterate (unsafely) one step forwards through a UTF-8
167 -- array, returning the current character and the delta to add to give
168 -- the next offset to iterate at.
169 iter :: Buffer -> Int -> Iter
170 iter (Buf arr off _ _ _) i = iterArray arr (off + i)
173 -- | /O(1)/ Iterate one step through a UTF-8 array, returning the
174 -- delta to add to give the next offset to iterate at.
175 iter_ :: Buffer -> Int -> Int
176 iter_ (Buf arr off _ _ _) i = utf8LengthByLeader $ A.unsafeIndex arr (off+i)
179 unsafeThaw :: A.Array -> ST s (A.MArray s)
180 unsafeThaw (A.ByteArray a) = ST $ \s# ->
181 (# s#, A.MutableByteArray (unsafeCoerce# a) #)
183 readGen :: A.Array -> Int
184 readGen (A.ByteArray a) = case indexIntArray# a 0# of r# -> I# r#
186 writeGen :: A.MArray s -> Int -> ST s ()
187 writeGen (A.MutableByteArray a) (I# gen#) = ST $ \s0# ->
188 case writeIntArray# a 0# gen# s0# of
193 lengthCodeUnits :: Text -> Int
194 lengthCodeUnits = lengthWord16
196 dropCodeUnits :: Int -> Buffer -> Text
197 dropCodeUnits s (Buf arr off len _ _) =
198 assert (s >= 0 && s <= len) $
199 Text arr (off+s) (len-s)
200 {-# INLINE dropCodeUnits #-}
202 -- | /O(1)/ Iterate (unsafely) one step forwards through a UTF-16
203 -- array, returning the current character and the delta to add to give
204 -- the next offset to iterate at.
205 iter :: Buffer -> Int -> Iter
206 iter (Buf arr off _ _ _) i
207 | m < 0xD800 || m > 0xDBFF = Iter (unsafeChr m) 1
208 | otherwise = Iter (chr2 m n) 2
209 where m = A.unsafeIndex arr j
210 n = A.unsafeIndex arr k
215 -- | /O(1)/ Iterate one step through a UTF-16 array, returning the
216 -- delta to add to give the next offset to iterate at.
217 iter_ :: Buffer -> Int -> Int
218 iter_ (Buf arr off _ _ _) i | m < 0xD800 || m > 0xDBFF = 1
220 where m = A.unsafeIndex arr (off+i)
223 unsafeThaw :: A.Array -> ST s (A.MArray s)
224 unsafeThaw A.Array{..} = ST $ \s# ->
225 (# s#, A.MArray (unsafeCoerce# aBA) #)
227 readGen :: A.Array -> Int
228 readGen a = case indexIntArray# (A.aBA a) 0# of r# -> I# r#
230 writeGen :: A.MArray s -> Int -> ST s ()
231 writeGen a (I# gen#) = ST $ \s0# ->
232 case writeIntArray# (A.maBA a) 0# gen# s0# of