]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Schema/Ngrams.hs
Merge branch 'dev' into 141-dev-node-stories-db-optimization
[gargantext.git] / src / Gargantext / Database / Schema / Ngrams.hs
1 {-|
2 Module : Gargantext.Database.Schema.NgramsPostag
3 Description : Ngram connection to the Database
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Ngrams connection to the Database.
11
12 -}
13
14 {-# OPTIONS_GHC -fno-warn-orphans #-}
15 {-# LANGUAGE Arrows #-}
16 {-# LANGUAGE FunctionalDependencies #-}
17 {-# LANGUAGE QuasiQuotes #-}
18 {-# LANGUAGE TemplateHaskell #-}
19
20 module Gargantext.Database.Schema.Ngrams
21 where
22
23 import Codec.Serialise (Serialise())
24 import Control.Lens (over)
25 import Control.Monad (mzero)
26 import Data.Maybe (fromMaybe)
27 import Data.HashMap.Strict (HashMap)
28 import Data.Hashable (Hashable)
29 import Data.Aeson
30 import Data.Aeson.Types (toJSONKeyText)
31 import Data.Map (fromList, lookup)
32 import Database.PostgreSQL.Simple.FromField (returnError, ResultError(..))
33 import Data.Text (Text, splitOn, pack, strip)
34 import Gargantext.Core.Types (TODO(..), Typed(..))
35 import Gargantext.Prelude
36 import Servant (FromHttpApiData(..), Proxy(..), ToHttpApiData(..))
37 import Gargantext.Core (HasDBid(..))
38 import Gargantext.Database.Types
39 import Gargantext.Database.Schema.Prelude
40 import Text.Read (read)
41 import qualified Data.ByteString.Char8 as B
42 import qualified Data.HashMap.Strict as HashMap
43 import qualified Database.PostgreSQL.Simple as PGS
44
45
46 type NgramsId = Int
47 type Size = Int
48
49 data NgramsPoly id terms n = NgramsDB { _ngrams_id :: !id
50 , _ngrams_terms :: !terms
51 , _ngrams_n :: !n
52 } deriving (Show)
53
54 type NgramsWrite = NgramsPoly (Maybe (Column SqlInt4))
55 (Column SqlText)
56 (Column SqlInt4)
57
58 type NgramsRead = NgramsPoly (Column SqlInt4)
59 (Column SqlText)
60 (Column SqlInt4)
61
62 type NgramsReadNull = NgramsPoly (Column (Nullable SqlInt4))
63 (Column (Nullable SqlText))
64 (Column (Nullable SqlInt4))
65
66 type NgramsDB = NgramsPoly Int Text Int
67
68 $(makeAdaptorAndInstance "pNgramsDb" ''NgramsPoly)
69 makeLenses ''NgramsPoly
70
71
72 ngramsTable :: Table NgramsWrite NgramsRead
73 ngramsTable = Table "ngrams" (pNgramsDb NgramsDB { _ngrams_id = optionalTableField "id"
74 , _ngrams_terms = requiredTableField "terms"
75 , _ngrams_n = requiredTableField "n"
76 }
77 )
78
79 -- | Main Ngrams Types
80 -- | Typed Ngrams
81 -- Typed Ngrams localize the context of the ngrams
82 -- ngrams in source field of document has Sources Type
83 -- ngrams in authors field of document has Authors Type
84 -- ngrams in text fields of documents has Terms Type (i.e. either title or abstract)
85 data NgramsType = Authors | Institutes | Sources | NgramsTerms
86 deriving (Eq, Show, Read, Ord, Enum, Bounded, Generic)
87 instance Serialise NgramsType
88 instance FromJSON NgramsType
89 instance FromJSONKey NgramsType where
90 fromJSONKey = FromJSONKeyTextParser (parseJSON . String)
91 instance ToJSON NgramsType
92 instance ToJSONKey NgramsType where
93 toJSONKey = toJSONKeyText (pack . show)
94 instance FromHttpApiData NgramsType where
95 parseUrlPiece n = pure $ (read . cs) n
96 instance ToHttpApiData NgramsType where
97 toUrlPiece = pack . show
98 instance ToParamSchema NgramsType where
99 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
100 -- map NgramsType to its assigned id
101 instance FromField NgramsType where
102 fromField fld mdata =
103 case B.unpack `fmap` mdata of
104 Nothing -> returnError UnexpectedNull fld ""
105 Just dat -> do
106 n <- fromField fld mdata
107 if (n :: Int) > 0 then
108 case fromNgramsTypeId (NgramsTypeId n) of
109 Nothing -> returnError ConversionFailed fld dat
110 Just nt -> pure nt
111 else
112 returnError ConversionFailed fld dat
113 instance ToField NgramsType where
114 toField nt = toField $ ngramsTypeId nt
115
116
117 ngramsTypes :: [NgramsType]
118 ngramsTypes = [minBound..]
119
120 instance ToSchema NgramsType
121 {- where
122 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
123 --}
124
125 newtype NgramsTypeId = NgramsTypeId Int
126 deriving (Eq, Show, Ord, Num)
127 instance ToField NgramsTypeId where
128 toField (NgramsTypeId n) = toField n
129 instance FromField NgramsTypeId where
130 fromField fld mdata = do
131 n <- fromField fld mdata
132 if (n :: Int) > 0 then return $ NgramsTypeId n
133 else mzero
134 instance DefaultFromField (Nullable SqlInt4) NgramsTypeId
135 where
136 defaultFromField = fromPGSFromField
137
138 pgNgramsType :: NgramsType -> Column SqlInt4
139 pgNgramsType = pgNgramsTypeId . ngramsTypeId
140
141 pgNgramsTypeId :: NgramsTypeId -> Column SqlInt4
142 pgNgramsTypeId (NgramsTypeId n) = sqlInt4 n
143
144 ngramsTypeId :: NgramsType -> NgramsTypeId
145 ngramsTypeId Authors = 1
146 ngramsTypeId Institutes = 2
147 ngramsTypeId Sources = 3
148 ngramsTypeId NgramsTerms = 4
149
150 fromNgramsTypeId :: NgramsTypeId -> Maybe NgramsType
151 fromNgramsTypeId id = lookup id
152 $ fromList [ (ngramsTypeId nt,nt)
153 | nt <- [minBound .. maxBound] :: [NgramsType]
154 ]
155
156 unNgramsTypeId :: NgramsTypeId -> Int
157 unNgramsTypeId (NgramsTypeId i) = i
158
159 toNgramsTypeId :: Int -> NgramsTypeId
160 toNgramsTypeId i = NgramsTypeId i
161
162 instance HasDBid NgramsType where
163 toDBid = unNgramsTypeId . ngramsTypeId
164 fromDBid = fromMaybe (panic "NgramsType id not indexed") . fromNgramsTypeId . toNgramsTypeId
165
166 ------------------------------------------------------------------------
167 ------------------------------------------------------------------------
168 -- | TODO put it in Gargantext.Core.Text.Ngrams
169 data Ngrams = UnsafeNgrams { _ngramsTerms :: Text
170 , _ngramsSize :: Int
171 }
172 deriving (Generic, Show, Eq, Ord)
173
174 instance Hashable Ngrams
175
176 makeLenses ''Ngrams
177 instance PGS.ToRow Ngrams where
178 toRow (UnsafeNgrams t s) = [toField t, toField s]
179
180 instance FromField Ngrams where
181 fromField fld mdata = do
182 x <- fromField fld mdata
183 pure $ text2ngrams x
184
185 instance PGS.ToRow Text where
186 toRow t = [toField t]
187
188 text2ngrams :: Text -> Ngrams
189 text2ngrams txt = UnsafeNgrams txt' $ length $ splitOn " " txt'
190 where
191 txt' = strip txt
192
193
194 ------------------------------------------------------------------------
195 -------------------------------------------------------------------------
196 -- | TODO put it in Gargantext.Core.Text.Ngrams
197 -- Named entity are typed ngrams of Terms Ngrams
198 data NgramsT a =
199 NgramsT { _ngramsType :: NgramsType
200 , _ngramsT :: a
201 } deriving (Generic, Show, Eq, Ord)
202
203 makeLenses ''NgramsT
204
205 instance Functor NgramsT where
206 fmap = over ngramsT
207
208 -----------------------------------------------------------------------
209 withMap :: HashMap Text NgramsId -> Text -> NgramsId
210 withMap m n = maybe (panic $ "[G.D.S.Ngrams.withMap] Should not happen" <> (cs $ show n))
211 identity (HashMap.lookup n m)
212
213 indexNgramsT :: HashMap Text NgramsId -> NgramsT Ngrams -> NgramsT (Indexed Int Ngrams)
214 indexNgramsT = fmap . indexNgramsWith . withMap
215
216 -- | TODO replace NgramsT whith Typed NgramsType Ngrams
217 indexTypedNgrams :: HashMap Text NgramsId
218 -> Typed NgramsType Ngrams
219 -> Typed NgramsType (Indexed Int Ngrams)
220 indexTypedNgrams = fmap . indexNgramsWith . withMap
221
222 indexNgrams :: HashMap Text NgramsId -> Ngrams -> Indexed Int Ngrams
223 indexNgrams = indexNgramsWith . withMap
224
225 indexNgramsWith :: (Text -> NgramsId) -> Ngrams -> Indexed Int Ngrams
226 indexNgramsWith f n = Indexed (f $ _ngramsTerms n) n