]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Schema/Ngrams.hs
[DB/FACT] Schema NodeNgramsRepo -> Query (with warnings)
[gargantext.git] / src / Gargantext / Database / Schema / Ngrams.hs
1 {-|
2 Module : Gargantext.Database.Schema.Ngrams
3 Description : Ngram connection to the Database
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Ngrams connection to the Database.
11
12 -}
13
14 {-# LANGUAGE Arrows #-}
15 {-# LANGUAGE DeriveGeneric #-}
16 {-# LANGUAGE FlexibleContexts #-}
17 {-# LANGUAGE FlexibleInstances #-}
18 {-# LANGUAGE FunctionalDependencies #-}
19 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
20 {-# LANGUAGE MultiParamTypeClasses #-}
21 {-# LANGUAGE NoImplicitPrelude #-}
22 {-# LANGUAGE OverloadedStrings #-}
23 {-# LANGUAGE QuasiQuotes #-}
24 {-# LANGUAGE RankNTypes #-}
25 {-# LANGUAGE TemplateHaskell #-}
26
27 module Gargantext.Database.Schema.Ngrams
28 where
29
30 import Control.Lens (makeLenses, over)
31 import Control.Monad (mzero)
32 import Data.Aeson
33 import Data.Aeson.Types (toJSONKeyText)
34 import Data.ByteString.Internal (ByteString)
35 import Data.Map (Map, fromList, lookup)
36 import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
37 import Data.Swagger (ToParamSchema, toParamSchema, ToSchema)
38 import Data.Text (Text, splitOn, pack)
39 import GHC.Generics (Generic)
40 import Gargantext.Core.Types (TODO(..))
41 import Gargantext.Prelude
42 import Prelude (Enum, Bounded, minBound, maxBound, Functor)
43 import Servant (FromHttpApiData, parseUrlPiece, Proxy(..))
44 import Text.Read (read)
45 import Gargantext.Database.Schema.Prelude
46 import qualified Database.PostgreSQL.Simple as PGS
47
48
49 type NgramsId = Int
50 type NgramsTerms = Text
51 type Size = Int
52
53 data NgramsPoly id terms n = NgramsDb { _ngrams_id :: !id
54 , _ngrams_terms :: !terms
55 , _ngrams_n :: !n
56 } deriving (Show)
57
58 type NgramsWrite = NgramsPoly (Maybe (Column PGInt4))
59 (Column PGText)
60 (Column PGInt4)
61
62 type NgramsRead = NgramsPoly (Column PGInt4)
63 (Column PGText)
64 (Column PGInt4)
65
66 type NgramsReadNull = NgramsPoly (Column (Nullable PGInt4))
67 (Column (Nullable PGText))
68 (Column (Nullable PGInt4))
69
70 type NgramsDb = NgramsPoly Int Text Int
71
72 $(makeAdaptorAndInstance "pNgramsDb" ''NgramsPoly)
73 makeLenses ''NgramsPoly
74
75
76 ngramsTable :: Table NgramsWrite NgramsRead
77 ngramsTable = Table "ngrams" (pNgramsDb NgramsDb { _ngrams_id = optional "id"
78 , _ngrams_terms = required "terms"
79 , _ngrams_n = required "n"
80 }
81 )
82
83
84
85 -- | Main Ngrams Types
86 -- | Typed Ngrams
87 -- Typed Ngrams localize the context of the ngrams
88 -- ngrams in source field of document has Sources Type
89 -- ngrams in authors field of document has Authors Type
90 -- ngrams in text (title or abstract) of documents has Terms Type
91 data NgramsType = Authors | Institutes | Sources | NgramsTerms
92 deriving (Eq, Show, Read, Ord, Enum, Bounded, Generic)
93
94 ngramsTypes :: [NgramsType]
95 ngramsTypes = [minBound..]
96
97 instance ToSchema NgramsType
98 {- where
99 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
100 --}
101
102 newtype NgramsTypeId = NgramsTypeId Int
103 deriving (Eq, Show, Ord, Num)
104
105 instance ToField NgramsTypeId where
106 toField (NgramsTypeId n) = toField n
107
108 instance FromField NgramsTypeId where
109 fromField fld mdata = do
110 n <- fromField fld mdata
111 if (n :: Int) > 0 then return $ NgramsTypeId n
112 else mzero
113
114 instance FromJSON NgramsType
115 instance FromJSONKey NgramsType where
116 fromJSONKey = FromJSONKeyTextParser (parseJSON . String)
117 instance ToJSON NgramsType
118 instance ToJSONKey NgramsType where
119 toJSONKey = toJSONKeyText (pack . show)
120
121 instance FromHttpApiData NgramsType where
122 parseUrlPiece n = pure $ (read . cs) n
123
124 instance ToParamSchema NgramsType where
125 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
126
127
128 instance QueryRunnerColumnDefault (Nullable PGInt4) NgramsTypeId
129 where
130 queryRunnerColumnDefault = fieldQueryRunnerColumn
131
132 pgNgramsType :: NgramsType -> Column PGInt4
133 pgNgramsType = pgNgramsTypeId . ngramsTypeId
134
135 pgNgramsTypeId :: NgramsTypeId -> Column PGInt4
136 pgNgramsTypeId (NgramsTypeId n) = pgInt4 n
137
138 ngramsTypeId :: NgramsType -> NgramsTypeId
139 ngramsTypeId Authors = 1
140 ngramsTypeId Institutes = 2
141 ngramsTypeId Sources = 3
142 ngramsTypeId NgramsTerms = 4
143
144 fromNgramsTypeId :: NgramsTypeId -> Maybe NgramsType
145 fromNgramsTypeId id = lookup id
146 $ fromList [ (ngramsTypeId nt,nt)
147 | nt <- [minBound .. maxBound] :: [NgramsType]
148 ]
149
150 ------------------------------------------------------------------------
151 -- | TODO put it in Gargantext.Text.Ngrams
152 data Ngrams = Ngrams { _ngramsTerms :: Text
153 , _ngramsSize :: Int
154 } deriving (Generic, Show, Eq, Ord)
155
156 makeLenses ''Ngrams
157 instance PGS.ToRow Ngrams where
158 toRow (Ngrams t s) = [toField t, toField s]
159
160 text2ngrams :: Text -> Ngrams
161 text2ngrams txt = Ngrams txt $ length $ splitOn " " txt
162
163 -------------------------------------------------------------------------
164 -- | TODO put it in Gargantext.Text.Ngrams
165 -- Named entity are typed ngrams of Terms Ngrams
166 data NgramsT a =
167 NgramsT { _ngramsType :: NgramsType
168 , _ngramsT :: a
169 } deriving (Generic, Show, Eq, Ord)
170
171 makeLenses ''NgramsT
172
173 instance Functor NgramsT where
174 fmap = over ngramsT
175 -----------------------------------------------------------------------
176 data NgramsIndexed =
177 NgramsIndexed
178 { _ngrams :: Ngrams
179 , _ngramsId :: NgramsId
180 } deriving (Show, Generic, Eq, Ord)
181
182 makeLenses ''NgramsIndexed
183 ------------------------------------------------------------------------
184 data NgramIds =
185 NgramIds
186 { ngramId :: Int
187 , ngramTerms :: Text
188 } deriving (Show, Generic, Eq, Ord)
189
190 instance PGS.FromRow NgramIds where
191 fromRow = NgramIds <$> field <*> field
192
193 ----------------------
194 withMap :: Map NgramsTerms NgramsId -> NgramsTerms -> NgramsId
195 withMap m n = maybe (panic "withMap: should not happen") identity (lookup n m)
196
197 indexNgramsT :: Map NgramsTerms NgramsId -> NgramsT Ngrams -> NgramsT NgramsIndexed
198 indexNgramsT = fmap . indexNgramsWith . withMap
199
200 indexNgrams :: Map NgramsTerms NgramsId -> Ngrams -> NgramsIndexed
201 indexNgrams = indexNgramsWith . withMap
202
203 -- NP: not sure we need it anymore
204 indexNgramsTWith :: (NgramsTerms -> NgramsId) -> NgramsT Ngrams -> NgramsT NgramsIndexed
205 indexNgramsTWith = fmap . indexNgramsWith
206
207 indexNgramsWith :: (NgramsTerms -> NgramsId) -> Ngrams -> NgramsIndexed
208 indexNgramsWith f n = NgramsIndexed n (f $ _ngramsTerms n)
209
210
211
212