]> Git — Sourcephile - haskell/symantic-parser.git/blob - src/Symantic/Parser/Machine/Input/Text/Buffer.hs
legal: add license `BSD-3-Clause`
[haskell/symantic-parser.git] / src / Symantic / Parser / Machine / Input / Text / Buffer.hs
1 {-# LANGUAGE BangPatterns, CPP, MagicHash, RankNTypes, RecordWildCards,
2 UnboxedTuples #-}
3 -- |
4 -- Module : Data.Attoparsec.Text.Buffer
5 -- SPDX-FileCopyrightText: 2007-2015 Bryan O'Sullivan <bob@serpentine.com>
6 -- SPDX-License-Identifier: BSD-3-Clause
7 --
8 -- Maintainer : bos@serpentine.com
9 -- Stability : experimental
10 -- Portability : GHC
11 --
12 -- An immutable buffer that supports cheap appends.
13
14 -- A Buffer is divided into an immutable read-only zone, followed by a
15 -- mutable area that we've preallocated, but not yet written to.
16 --
17 -- We overallocate at the end of a Buffer so that we can cheaply
18 -- append. Since a user of an existing Buffer cannot see past the end
19 -- of its immutable zone into the data that will change during an
20 -- append, this is safe.
21 --
22 -- Once we run out of space at the end of a Buffer, we do the usual
23 -- doubling of the buffer size.
24
25 module Data.Attoparsec.Text.Buffer
26 (
27 Buffer
28 , buffer
29 , unbuffer
30 , unbufferAt
31 , length
32 , pappend
33 , iter
34 , iter_
35 , substring
36 , dropWord16
37 ) where
38
39 import Control.Exception (assert)
40 import Data.Bits (shiftR)
41 import Data.List (foldl1')
42 import Data.Monoid as Mon (Monoid(..))
43 import Data.Semigroup (Semigroup(..))
44 import Data.Text ()
45 import Data.Text.Internal (Text(..))
46 import Data.Text.Internal.Encoding.Utf16 (chr2)
47 import Data.Text.Internal.Unsafe.Char (unsafeChr)
48 import Data.Text.Unsafe (Iter(..))
49 import Foreign.Storable (sizeOf)
50 import GHC.Exts (Int(..), indexIntArray#, unsafeCoerce#, writeIntArray#)
51 import GHC.ST (ST(..), runST)
52 import Prelude hiding (length)
53 import qualified Data.Text.Array as A
54
55 -- If _cap is zero, this buffer is empty.
56 data Buffer = Buf {
57 _arr :: {-# UNPACK #-} !A.Array
58 , _off :: {-# UNPACK #-} !Int
59 , _len :: {-# UNPACK #-} !Int
60 , _cap :: {-# UNPACK #-} !Int
61 , _gen :: {-# UNPACK #-} !Int
62 }
63
64 instance Show Buffer where
65 showsPrec p = showsPrec p . unbuffer
66
67 -- | The initial 'Buffer' has no mutable zone, so we can avoid all
68 -- copies in the (hopefully) common case of no further input being fed
69 -- to us.
70 buffer :: Text -> Buffer
71 buffer (Text arr off len) = Buf arr off len len 0
72
73 unbuffer :: Buffer -> Text
74 unbuffer (Buf arr off len _ _) = Text arr off len
75
76 unbufferAt :: Int -> Buffer -> Text
77 unbufferAt s (Buf arr off len _ _) =
78 assert (s >= 0 && s <= len) $
79 Text arr (off+s) (len-s)
80
81 instance Semigroup Buffer where
82 (Buf _ _ _ 0 _) <> b = b
83 a <> (Buf _ _ _ 0 _) = a
84 buf <> (Buf arr off len _ _) = append buf arr off len
85 {-# INLINE (<>) #-}
86
87 instance Monoid Buffer where
88 mempty = Buf A.empty 0 0 0 0
89 {-# INLINE mempty #-}
90
91 mappend = (<>)
92
93 mconcat [] = Mon.mempty
94 mconcat xs = foldl1' (<>) xs
95
96 pappend :: Buffer -> Text -> Buffer
97 pappend (Buf _ _ _ 0 _) t = buffer t
98 pappend buf (Text arr off len) = append buf arr off len
99
100 append :: Buffer -> A.Array -> Int -> Int -> Buffer
101 append (Buf arr0 off0 len0 cap0 gen0) !arr1 !off1 !len1 = runST $ do
102 let woff = sizeOf (0::Int) `shiftR` 1
103 newlen = len0 + len1
104 !gen = if gen0 == 0 then 0 else readGen arr0
105 if gen == gen0 && newlen <= cap0
106 then do
107 let newgen = gen + 1
108 marr <- unsafeThaw arr0
109 writeGen marr newgen
110 A.copyI marr (off0+len0) arr1 off1 (off0+newlen)
111 arr2 <- A.unsafeFreeze marr
112 return (Buf arr2 off0 newlen cap0 newgen)
113 else do
114 let newcap = newlen * 2
115 newgen = 1
116 marr <- A.new (newcap + woff)
117 writeGen marr newgen
118 A.copyI marr woff arr0 off0 (woff+len0)
119 A.copyI marr (woff+len0) arr1 off1 (woff+newlen)
120 arr2 <- A.unsafeFreeze marr
121 return (Buf arr2 woff newlen newcap newgen)
122
123 length :: Buffer -> Int
124 length (Buf _ _ len _ _) = len
125 {-# INLINE length #-}
126
127 substring :: Int -> Int -> Buffer -> Text
128 substring s l (Buf arr off len _ _) =
129 assert (s >= 0 && s <= len) .
130 assert (l >= 0 && l <= len-s) $
131 Text arr (off+s) l
132 {-# INLINE substring #-}
133
134 dropWord16 :: Int -> Buffer -> Text
135 dropWord16 s (Buf arr off len _ _) =
136 assert (s >= 0 && s <= len) $
137 Text arr (off+s) (len-s)
138 {-# INLINE dropWord16 #-}
139
140 -- | /O(1)/ Iterate (unsafely) one step forwards through a UTF-16
141 -- array, returning the current character and the delta to add to give
142 -- the next offset to iterate at.
143 iter :: Buffer -> Int -> Iter
144 iter (Buf arr off _ _ _) i
145 | m < 0xD800 || m > 0xDBFF = Iter (unsafeChr m) 1
146 | otherwise = Iter (chr2 m n) 2
147 where m = A.unsafeIndex arr j
148 n = A.unsafeIndex arr k
149 j = off + i
150 k = j + 1
151 {-# INLINE iter #-}
152
153 -- | /O(1)/ Iterate one step through a UTF-16 array, returning the
154 -- delta to add to give the next offset to iterate at.
155 iter_ :: Buffer -> Int -> Int
156 iter_ (Buf arr off _ _ _) i | m < 0xD800 || m > 0xDBFF = 1
157 | otherwise = 2
158 where m = A.unsafeIndex arr (off+i)
159 {-# INLINE iter_ #-}
160
161 unsafeThaw :: A.Array -> ST s (A.MArray s)
162 unsafeThaw A.Array{..} = ST $ \s# ->
163 (# s#, A.MArray (unsafeCoerce# aBA) #)
164
165 readGen :: A.Array -> Int
166 readGen a = case indexIntArray# (A.aBA a) 0# of r# -> I# r#
167
168 writeGen :: A.MArray s -> Int -> ST s ()
169 writeGen a (I# gen#) = ST $ \s0# ->
170 case writeIntArray# (A.maBA a) 0# gen# s0# of
171 s1# -> (# s1#, () #)