]> Git — Sourcephile - haskell/symantic-parser.git/blob - src/Symantic/Parser/Machine/Input/Text/Buffer.hs
impl: add attoparsec's Text.Buffer
[haskell/symantic-parser.git] / src / Symantic / Parser / Machine / Input / Text / Buffer.hs
1 {-# LANGUAGE BangPatterns, CPP, MagicHash, RankNTypes, RecordWildCards,
2 UnboxedTuples #-}
3
4 -- |
5 -- Module : Data.Attoparsec.Text.Buffer
6 -- Copyright : Bryan O'Sullivan 2007-2015
7 -- License : BSD3
8 --
9 -- Maintainer : bos@serpentine.com
10 -- Stability : experimental
11 -- Portability : GHC
12 --
13 -- An immutable buffer that supports cheap appends.
14
15 -- A Buffer is divided into an immutable read-only zone, followed by a
16 -- mutable area that we've preallocated, but not yet written to.
17 --
18 -- We overallocate at the end of a Buffer so that we can cheaply
19 -- append. Since a user of an existing Buffer cannot see past the end
20 -- of its immutable zone into the data that will change during an
21 -- append, this is safe.
22 --
23 -- Once we run out of space at the end of a Buffer, we do the usual
24 -- doubling of the buffer size.
25
26 module Data.Attoparsec.Text.Buffer
27 (
28 Buffer
29 , buffer
30 , unbuffer
31 , unbufferAt
32 , length
33 , pappend
34 , iter
35 , iter_
36 , substring
37 , dropWord16
38 ) where
39
40 import Control.Exception (assert)
41 import Data.Bits (shiftR)
42 import Data.List (foldl1')
43 import Data.Monoid as Mon (Monoid(..))
44 import Data.Semigroup (Semigroup(..))
45 import Data.Text ()
46 import Data.Text.Internal (Text(..))
47 import Data.Text.Internal.Encoding.Utf16 (chr2)
48 import Data.Text.Internal.Unsafe.Char (unsafeChr)
49 import Data.Text.Unsafe (Iter(..))
50 import Foreign.Storable (sizeOf)
51 import GHC.Exts (Int(..), indexIntArray#, unsafeCoerce#, writeIntArray#)
52 import GHC.ST (ST(..), runST)
53 import Prelude hiding (length)
54 import qualified Data.Text.Array as A
55
56 -- If _cap is zero, this buffer is empty.
57 data Buffer = Buf {
58 _arr :: {-# UNPACK #-} !A.Array
59 , _off :: {-# UNPACK #-} !Int
60 , _len :: {-# UNPACK #-} !Int
61 , _cap :: {-# UNPACK #-} !Int
62 , _gen :: {-# UNPACK #-} !Int
63 }
64
65 instance Show Buffer where
66 showsPrec p = showsPrec p . unbuffer
67
68 -- | The initial 'Buffer' has no mutable zone, so we can avoid all
69 -- copies in the (hopefully) common case of no further input being fed
70 -- to us.
71 buffer :: Text -> Buffer
72 buffer (Text arr off len) = Buf arr off len len 0
73
74 unbuffer :: Buffer -> Text
75 unbuffer (Buf arr off len _ _) = Text arr off len
76
77 unbufferAt :: Int -> Buffer -> Text
78 unbufferAt s (Buf arr off len _ _) =
79 assert (s >= 0 && s <= len) $
80 Text arr (off+s) (len-s)
81
82 instance Semigroup Buffer where
83 (Buf _ _ _ 0 _) <> b = b
84 a <> (Buf _ _ _ 0 _) = a
85 buf <> (Buf arr off len _ _) = append buf arr off len
86 {-# INLINE (<>) #-}
87
88 instance Monoid Buffer where
89 mempty = Buf A.empty 0 0 0 0
90 {-# INLINE mempty #-}
91
92 mappend = (<>)
93
94 mconcat [] = Mon.mempty
95 mconcat xs = foldl1' (<>) xs
96
97 pappend :: Buffer -> Text -> Buffer
98 pappend (Buf _ _ _ 0 _) t = buffer t
99 pappend buf (Text arr off len) = append buf arr off len
100
101 append :: Buffer -> A.Array -> Int -> Int -> Buffer
102 append (Buf arr0 off0 len0 cap0 gen0) !arr1 !off1 !len1 = runST $ do
103 let woff = sizeOf (0::Int) `shiftR` 1
104 newlen = len0 + len1
105 !gen = if gen0 == 0 then 0 else readGen arr0
106 if gen == gen0 && newlen <= cap0
107 then do
108 let newgen = gen + 1
109 marr <- unsafeThaw arr0
110 writeGen marr newgen
111 A.copyI marr (off0+len0) arr1 off1 (off0+newlen)
112 arr2 <- A.unsafeFreeze marr
113 return (Buf arr2 off0 newlen cap0 newgen)
114 else do
115 let newcap = newlen * 2
116 newgen = 1
117 marr <- A.new (newcap + woff)
118 writeGen marr newgen
119 A.copyI marr woff arr0 off0 (woff+len0)
120 A.copyI marr (woff+len0) arr1 off1 (woff+newlen)
121 arr2 <- A.unsafeFreeze marr
122 return (Buf arr2 woff newlen newcap newgen)
123
124 length :: Buffer -> Int
125 length (Buf _ _ len _ _) = len
126 {-# INLINE length #-}
127
128 substring :: Int -> Int -> Buffer -> Text
129 substring s l (Buf arr off len _ _) =
130 assert (s >= 0 && s <= len) .
131 assert (l >= 0 && l <= len-s) $
132 Text arr (off+s) l
133 {-# INLINE substring #-}
134
135 dropWord16 :: Int -> Buffer -> Text
136 dropWord16 s (Buf arr off len _ _) =
137 assert (s >= 0 && s <= len) $
138 Text arr (off+s) (len-s)
139 {-# INLINE dropWord16 #-}
140
141 -- | /O(1)/ Iterate (unsafely) one step forwards through a UTF-16
142 -- array, returning the current character and the delta to add to give
143 -- the next offset to iterate at.
144 iter :: Buffer -> Int -> Iter
145 iter (Buf arr off _ _ _) i
146 | m < 0xD800 || m > 0xDBFF = Iter (unsafeChr m) 1
147 | otherwise = Iter (chr2 m n) 2
148 where m = A.unsafeIndex arr j
149 n = A.unsafeIndex arr k
150 j = off + i
151 k = j + 1
152 {-# INLINE iter #-}
153
154 -- | /O(1)/ Iterate one step through a UTF-16 array, returning the
155 -- delta to add to give the next offset to iterate at.
156 iter_ :: Buffer -> Int -> Int
157 iter_ (Buf arr off _ _ _) i | m < 0xD800 || m > 0xDBFF = 1
158 | otherwise = 2
159 where m = A.unsafeIndex arr (off+i)
160 {-# INLINE iter_ #-}
161
162 unsafeThaw :: A.Array -> ST s (A.MArray s)
163 unsafeThaw A.Array{..} = ST $ \s# ->
164 (# s#, A.MArray (unsafeCoerce# aBA) #)
165
166 readGen :: A.Array -> Int
167 readGen a = case indexIntArray# (A.aBA a) 0# of r# -> I# r#
168
169 writeGen :: A.MArray s -> Int -> ST s ()
170 writeGen a (I# gen#) = ST $ \s0# ->
171 case writeIntArray# (A.maBA a) 0# gen# s0# of
172 s1# -> (# s1#, () #)