1 {-# LANGUAGE BangPatterns, CPP, MagicHash, RankNTypes, RecordWildCards,
4 -- Module : Data.Attoparsec.Text.Buffer
5 -- SPDX-FileCopyrightText: 2007-2015 Bryan O'Sullivan <bob@serpentine.com>
6 -- SPDX-License-Identifier: BSD-3-Clause
8 -- Maintainer : bos@serpentine.com
9 -- Stability : experimental
12 -- An immutable buffer that supports cheap appends.
14 -- A Buffer is divided into an immutable read-only zone, followed by a
15 -- mutable area that we've preallocated, but not yet written to.
17 -- We overallocate at the end of a Buffer so that we can cheaply
18 -- append. Since a user of an existing Buffer cannot see past the end
19 -- of its immutable zone into the data that will change during an
20 -- append, this is safe.
22 -- Once we run out of space at the end of a Buffer, we do the usual
23 -- doubling of the buffer size.
25 module Data.Attoparsec.Text.Buffer
39 import Control.Exception (assert)
40 import Data.Bits (shiftR)
41 import Data.List (foldl1')
42 import Data.Monoid as Mon (Monoid(..))
43 import Data.Semigroup (Semigroup(..))
45 import Data.Text.Internal (Text(..))
46 import Data.Text.Internal.Encoding.Utf16 (chr2)
47 import Data.Text.Internal.Unsafe.Char (unsafeChr)
48 import Data.Text.Unsafe (Iter(..))
49 import Foreign.Storable (sizeOf)
50 import GHC.Exts (Int(..), indexIntArray#, unsafeCoerce#, writeIntArray#)
51 import GHC.ST (ST(..), runST)
52 import Prelude hiding (length)
53 import qualified Data.Text.Array as A
55 -- If _cap is zero, this buffer is empty.
57 _arr :: {-# UNPACK #-} !A.Array
58 , _off :: {-# UNPACK #-} !Int
59 , _len :: {-# UNPACK #-} !Int
60 , _cap :: {-# UNPACK #-} !Int
61 , _gen :: {-# UNPACK #-} !Int
64 instance Show Buffer where
65 showsPrec p = showsPrec p . unbuffer
67 -- | The initial 'Buffer' has no mutable zone, so we can avoid all
68 -- copies in the (hopefully) common case of no further input being fed
70 buffer :: Text -> Buffer
71 buffer (Text arr off len) = Buf arr off len len 0
73 unbuffer :: Buffer -> Text
74 unbuffer (Buf arr off len _ _) = Text arr off len
76 unbufferAt :: Int -> Buffer -> Text
77 unbufferAt s (Buf arr off len _ _) =
78 assert (s >= 0 && s <= len) $
79 Text arr (off+s) (len-s)
81 instance Semigroup Buffer where
82 (Buf _ _ _ 0 _) <> b = b
83 a <> (Buf _ _ _ 0 _) = a
84 buf <> (Buf arr off len _ _) = append buf arr off len
87 instance Monoid Buffer where
88 mempty = Buf A.empty 0 0 0 0
93 mconcat [] = Mon.mempty
94 mconcat xs = foldl1' (<>) xs
96 pappend :: Buffer -> Text -> Buffer
97 pappend (Buf _ _ _ 0 _) t = buffer t
98 pappend buf (Text arr off len) = append buf arr off len
100 append :: Buffer -> A.Array -> Int -> Int -> Buffer
101 append (Buf arr0 off0 len0 cap0 gen0) !arr1 !off1 !len1 = runST $ do
102 let woff = sizeOf (0::Int) `shiftR` 1
104 !gen = if gen0 == 0 then 0 else readGen arr0
105 if gen == gen0 && newlen <= cap0
108 marr <- unsafeThaw arr0
110 A.copyI marr (off0+len0) arr1 off1 (off0+newlen)
111 arr2 <- A.unsafeFreeze marr
112 return (Buf arr2 off0 newlen cap0 newgen)
114 let newcap = newlen * 2
116 marr <- A.new (newcap + woff)
118 A.copyI marr woff arr0 off0 (woff+len0)
119 A.copyI marr (woff+len0) arr1 off1 (woff+newlen)
120 arr2 <- A.unsafeFreeze marr
121 return (Buf arr2 woff newlen newcap newgen)
123 length :: Buffer -> Int
124 length (Buf _ _ len _ _) = len
125 {-# INLINE length #-}
127 substring :: Int -> Int -> Buffer -> Text
128 substring s l (Buf arr off len _ _) =
129 assert (s >= 0 && s <= len) .
130 assert (l >= 0 && l <= len-s) $
132 {-# INLINE substring #-}
134 dropWord16 :: Int -> Buffer -> Text
135 dropWord16 s (Buf arr off len _ _) =
136 assert (s >= 0 && s <= len) $
137 Text arr (off+s) (len-s)
138 {-# INLINE dropWord16 #-}
140 -- | /O(1)/ Iterate (unsafely) one step forwards through a UTF-16
141 -- array, returning the current character and the delta to add to give
142 -- the next offset to iterate at.
143 iter :: Buffer -> Int -> Iter
144 iter (Buf arr off _ _ _) i
145 | m < 0xD800 || m > 0xDBFF = Iter (unsafeChr m) 1
146 | otherwise = Iter (chr2 m n) 2
147 where m = A.unsafeIndex arr j
148 n = A.unsafeIndex arr k
153 -- | /O(1)/ Iterate one step through a UTF-16 array, returning the
154 -- delta to add to give the next offset to iterate at.
155 iter_ :: Buffer -> Int -> Int
156 iter_ (Buf arr off _ _ _) i | m < 0xD800 || m > 0xDBFF = 1
158 where m = A.unsafeIndex arr (off+i)
161 unsafeThaw :: A.Array -> ST s (A.MArray s)
162 unsafeThaw A.Array{..} = ST $ \s# ->
163 (# s#, A.MArray (unsafeCoerce# aBA) #)
165 readGen :: A.Array -> Int
166 readGen a = case indexIntArray# (A.aBA a) 0# of r# -> I# r#
168 writeGen :: A.MArray s -> Int -> ST s ()
169 writeGen a (I# gen#) = ST $ \s0# ->
170 case writeIntArray# (A.maBA a) 0# gen# s0# of