]> Git — Sourcephile - haskell/symantic-parser.git/blob - src/Symantic/Parser/Machine/Input/Text/Buffer.hs
wip
[haskell/symantic-parser.git] / src / Symantic / Parser / Machine / Input / Text / Buffer.hs
1 {-# LANGUAGE BangPatterns, CPP, MagicHash, RankNTypes, RecordWildCards,
2 UnboxedTuples #-}
3 -- |
4 -- Module : Data.Attoparsec.Text.Buffer
5 -- SPDX-FileCopyrightText: 2007-2015 Bryan O'Sullivan <bob@serpentine.com>
6 -- SPDX-License-Identifier: BSD-3-Clause
7 --
8 -- Maintainer : bos@serpentine.com
9 -- Stability : experimental
10 -- Portability : GHC
11 --
12 -- An immutable buffer that supports cheap appends.
13
14 -- A Buffer is divided into an immutable read-only zone, followed by a
15 -- mutable area that we've preallocated, but not yet written to.
16 --
17 -- We overallocate at the end of a Buffer so that we can cheaply
18 -- append. Since a user of an existing Buffer cannot see past the end
19 -- of its immutable zone into the data that will change during an
20 -- append, this is safe.
21 --
22 -- Once we run out of space at the end of a Buffer, we do the usual
23 -- doubling of the buffer size.
24
25 module Symantic.Parser.Machine.Input.Text.Buffer
26 (
27 Buffer
28 , buffer
29 , unbuffer
30 , unbufferAt
31 , length
32 , pappend
33 , iter
34 , iter_
35 , substring
36 , dropWord16
37 ) where
38
39 import Control.Exception (assert)
40 import Data.Bits (shiftR)
41 import Data.List (foldl1')
42 import Data.Monoid as Mon (Monoid(..))
43 import Data.Text ()
44 import Data.Text.Internal (Text(..))
45 import Data.Text.Internal.Encoding.Utf16 (chr2)
46 import Data.Text.Internal.Unsafe.Char (unsafeChr)
47 import Data.Text.Unsafe (Iter(..))
48 import Foreign.Storable (sizeOf)
49 import GHC.Exts (Int(..), indexIntArray#, unsafeCoerce#, writeIntArray#)
50 import GHC.ST (ST(..), runST)
51 import Prelude hiding (length)
52 import qualified Data.Text.Array as A
53
54 -- If _cap is zero, this buffer is empty.
55 data Buffer = Buf {
56 _arr :: {-# UNPACK #-} !A.Array
57 , _off :: {-# UNPACK #-} !Int
58 , _len :: {-# UNPACK #-} !Int
59 , _cap :: {-# UNPACK #-} !Int
60 , _gen :: {-# UNPACK #-} !Int
61 }
62
63 instance Show Buffer where
64 showsPrec p = showsPrec p . unbuffer
65
66 -- | The initial 'Buffer' has no mutable zone, so we can avoid all
67 -- copies in the (hopefully) common case of no further input being fed
68 -- to us.
69 buffer :: Text -> Buffer
70 buffer (Text arr off len) = Buf arr off len len 0
71
72 unbuffer :: Buffer -> Text
73 unbuffer (Buf arr off len _ _) = Text arr off len
74
75 unbufferAt :: Int -> Buffer -> Text
76 unbufferAt s (Buf arr off len _ _) =
77 assert (s >= 0 && s <= len) $
78 Text arr (off+s) (len-s)
79
80 instance Semigroup Buffer where
81 (Buf _ _ _ 0 _) <> b = b
82 a <> (Buf _ _ _ 0 _) = a
83 buf <> (Buf arr off len _ _) = append buf arr off len
84 {-# INLINE (<>) #-}
85
86 instance Monoid Buffer where
87 mempty = Buf A.empty 0 0 0 0
88 {-# INLINE mempty #-}
89
90 mappend = (<>)
91
92 mconcat [] = Mon.mempty
93 mconcat xs = foldl1' (<>) xs
94
95 pappend :: Buffer -> Text -> Buffer
96 pappend (Buf _ _ _ 0 _) t = buffer t
97 pappend buf (Text arr off len) = append buf arr off len
98
99 append :: Buffer -> A.Array -> Int -> Int -> Buffer
100 append (Buf arr0 off0 len0 cap0 gen0) !arr1 !off1 !len1 = runST $ do
101 let woff = sizeOf (0::Int) `shiftR` 1
102 newlen = len0 + len1
103 !gen = if gen0 == 0 then 0 else readGen arr0
104 if gen == gen0 && newlen <= cap0
105 then do
106 let newgen = gen + 1
107 marr <- unsafeThaw arr0
108 writeGen marr newgen
109 A.copyI marr (off0+len0) arr1 off1 (off0+newlen)
110 arr2 <- A.unsafeFreeze marr
111 return (Buf arr2 off0 newlen cap0 newgen)
112 else do
113 let newcap = newlen * 2
114 newgen = 1
115 marr <- A.new (newcap + woff)
116 writeGen marr newgen
117 A.copyI marr woff arr0 off0 (woff+len0)
118 A.copyI marr (woff+len0) arr1 off1 (woff+newlen)
119 arr2 <- A.unsafeFreeze marr
120 return (Buf arr2 woff newlen newcap newgen)
121
122 length :: Buffer -> Int
123 length (Buf _ _ len _ _) = len
124 {-# INLINE length #-}
125
126 substring :: Int -> Int -> Buffer -> Text
127 substring s l (Buf arr off len _ _) =
128 assert (s >= 0 && s <= len) .
129 assert (l >= 0 && l <= len-s) $
130 Text arr (off+s) l
131 {-# INLINE substring #-}
132
133 dropWord16 :: Int -> Buffer -> Text
134 dropWord16 s (Buf arr off len _ _) =
135 assert (s >= 0 && s <= len) $
136 Text arr (off+s) (len-s)
137 {-# INLINE dropWord16 #-}
138
139 -- | /O(1)/ Iterate (unsafely) one step forwards through a UTF-16
140 -- array, returning the current character and the delta to add to give
141 -- the next offset to iterate at.
142 iter :: Buffer -> Int -> Iter
143 iter (Buf arr off _ _ _) i
144 | m < 0xD800 || m > 0xDBFF = Iter (unsafeChr m) 1
145 | otherwise = Iter (chr2 m n) 2
146 where m = A.unsafeIndex arr j
147 n = A.unsafeIndex arr k
148 j = off + i
149 k = j + 1
150 {-# INLINE iter #-}
151
152 -- | /O(1)/ Iterate one step through a UTF-16 array, returning the
153 -- delta to add to give the next offset to iterate at.
154 iter_ :: Buffer -> Int -> Int
155 iter_ (Buf arr off _ _ _) i | m < 0xD800 || m > 0xDBFF = 1
156 | otherwise = 2
157 where m = A.unsafeIndex arr (off+i)
158 {-# INLINE iter_ #-}
159
160 unsafeThaw :: A.Array -> ST s (A.MArray s)
161 unsafeThaw A.Array{..} = ST $ \s# ->
162 (# s#, A.MArray (unsafeCoerce# aBA) #)
163
164 readGen :: A.Array -> Int
165 readGen a = case indexIntArray# (A.aBA a) 0# of r# -> I# r#
166
167 writeGen :: A.MArray s -> Int -> ST s ()
168 writeGen a (I# gen#) = ST $ \s0# ->
169 case writeIntArray# (A.maBA a) 0# gen# s0# of
170 s1# -> (# s1#, () #)