]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Schema/Ngrams.hs
[REPL sugar] runCmdReplEasy
[gargantext.git] / src / Gargantext / Database / Schema / Ngrams.hs
1 {-|
2 Module : Gargantext.Database.Schema.Ngrams
3 Description : Ngram connection to the Database
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Ngrams connection to the Database.
11
12 -}
13
14 {-# LANGUAGE Arrows #-}
15 {-# LANGUAGE DeriveGeneric #-}
16 {-# LANGUAGE FlexibleContexts #-}
17 {-# LANGUAGE FlexibleInstances #-}
18 {-# LANGUAGE FunctionalDependencies #-}
19 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
20 {-# LANGUAGE MultiParamTypeClasses #-}
21 {-# LANGUAGE NoImplicitPrelude #-}
22 {-# LANGUAGE OverloadedStrings #-}
23 {-# LANGUAGE QuasiQuotes #-}
24 {-# LANGUAGE RankNTypes #-}
25 {-# LANGUAGE TemplateHaskell #-}
26
27 module Gargantext.Database.Schema.Ngrams
28 where
29
30 import Codec.Serialise (Serialise())
31 import Control.Lens (makeLenses, over)
32 import Control.Monad (mzero)
33 import Data.Aeson
34 import Data.Aeson.Types (toJSONKeyText)
35 import Data.Map (Map, fromList, lookup)
36 import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
37 import Data.Swagger (ToParamSchema, toParamSchema, ToSchema)
38 import Data.Text (Text, splitOn, pack)
39 import GHC.Generics (Generic)
40 import Gargantext.Core.Types (TODO(..))
41 import Gargantext.Prelude
42 import Prelude (Enum, Bounded, minBound, maxBound, Functor)
43 import Servant (FromHttpApiData, parseUrlPiece, Proxy(..))
44 import Text.Read (read)
45 import Gargantext.Database.Schema.Prelude
46 import qualified Database.PostgreSQL.Simple as PGS
47
48
49 type NgramsId = Int
50 type NgramsTerms = Text
51 type Size = Int
52
53 data NgramsPoly id terms n = NgramsDb { _ngrams_id :: !id
54 , _ngrams_terms :: !terms
55 , _ngrams_n :: !n
56 } deriving (Show)
57
58 type NgramsWrite = NgramsPoly (Maybe (Column PGInt4))
59 (Column PGText)
60 (Column PGInt4)
61
62 type NgramsRead = NgramsPoly (Column PGInt4)
63 (Column PGText)
64 (Column PGInt4)
65
66 type NgramsReadNull = NgramsPoly (Column (Nullable PGInt4))
67 (Column (Nullable PGText))
68 (Column (Nullable PGInt4))
69
70 type NgramsDb = NgramsPoly Int Text Int
71
72 $(makeAdaptorAndInstance "pNgramsDb" ''NgramsPoly)
73 makeLenses ''NgramsPoly
74
75
76 ngramsTable :: Table NgramsWrite NgramsRead
77 ngramsTable = Table "ngrams" (pNgramsDb NgramsDb { _ngrams_id = optional "id"
78 , _ngrams_terms = required "terms"
79 , _ngrams_n = required "n"
80 }
81 )
82
83
84
85 -- | Main Ngrams Types
86 -- | Typed Ngrams
87 -- Typed Ngrams localize the context of the ngrams
88 -- ngrams in source field of document has Sources Type
89 -- ngrams in authors field of document has Authors Type
90 -- ngrams in text (title or abstract) of documents has Terms Type
91 data NgramsType = Authors | Institutes | Sources | NgramsTerms
92 deriving (Eq, Show, Read, Ord, Enum, Bounded, Generic)
93
94 instance Serialise NgramsType
95
96 ngramsTypes :: [NgramsType]
97 ngramsTypes = [minBound..]
98
99 instance ToSchema NgramsType
100 {- where
101 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
102 --}
103
104 newtype NgramsTypeId = NgramsTypeId Int
105 deriving (Eq, Show, Ord, Num)
106
107 instance ToField NgramsTypeId where
108 toField (NgramsTypeId n) = toField n
109
110 instance FromField NgramsTypeId where
111 fromField fld mdata = do
112 n <- fromField fld mdata
113 if (n :: Int) > 0 then return $ NgramsTypeId n
114 else mzero
115
116 instance FromJSON NgramsType
117 instance FromJSONKey NgramsType where
118 fromJSONKey = FromJSONKeyTextParser (parseJSON . String)
119 instance ToJSON NgramsType
120 instance ToJSONKey NgramsType where
121 toJSONKey = toJSONKeyText (pack . show)
122
123 instance FromHttpApiData NgramsType where
124 parseUrlPiece n = pure $ (read . cs) n
125
126 instance ToParamSchema NgramsType where
127 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
128
129
130 instance QueryRunnerColumnDefault (Nullable PGInt4) NgramsTypeId
131 where
132 queryRunnerColumnDefault = fieldQueryRunnerColumn
133
134 pgNgramsType :: NgramsType -> Column PGInt4
135 pgNgramsType = pgNgramsTypeId . ngramsTypeId
136
137 pgNgramsTypeId :: NgramsTypeId -> Column PGInt4
138 pgNgramsTypeId (NgramsTypeId n) = pgInt4 n
139
140 ngramsTypeId :: NgramsType -> NgramsTypeId
141 ngramsTypeId Authors = 1
142 ngramsTypeId Institutes = 2
143 ngramsTypeId Sources = 3
144 ngramsTypeId NgramsTerms = 4
145
146 fromNgramsTypeId :: NgramsTypeId -> Maybe NgramsType
147 fromNgramsTypeId id = lookup id
148 $ fromList [ (ngramsTypeId nt,nt)
149 | nt <- [minBound .. maxBound] :: [NgramsType]
150 ]
151
152 ------------------------------------------------------------------------
153 -- | TODO put it in Gargantext.Text.Ngrams
154 data Ngrams = Ngrams { _ngramsTerms :: Text
155 , _ngramsSize :: Int
156 } deriving (Generic, Show, Eq, Ord)
157
158 makeLenses ''Ngrams
159 instance PGS.ToRow Ngrams where
160 toRow (Ngrams t s) = [toField t, toField s]
161
162 text2ngrams :: Text -> Ngrams
163 text2ngrams txt = Ngrams txt $ length $ splitOn " " txt
164
165 -------------------------------------------------------------------------
166 -- | TODO put it in Gargantext.Text.Ngrams
167 -- Named entity are typed ngrams of Terms Ngrams
168 data NgramsT a =
169 NgramsT { _ngramsType :: NgramsType
170 , _ngramsT :: a
171 } deriving (Generic, Show, Eq, Ord)
172
173 makeLenses ''NgramsT
174
175 instance Functor NgramsT where
176 fmap = over ngramsT
177 -----------------------------------------------------------------------
178 data NgramsIndexed =
179 NgramsIndexed
180 { _ngrams :: Ngrams
181 , _ngramsId :: NgramsId
182 } deriving (Show, Generic, Eq, Ord)
183
184 makeLenses ''NgramsIndexed
185 ------------------------------------------------------------------------
186 data NgramIds =
187 NgramIds
188 { ngramId :: Int
189 , ngramTerms :: Text
190 } deriving (Show, Generic, Eq, Ord)
191
192 instance PGS.FromRow NgramIds where
193 fromRow = NgramIds <$> field <*> field
194
195 ----------------------
196 withMap :: Map NgramsTerms NgramsId -> NgramsTerms -> NgramsId
197 withMap m n = maybe (panic "withMap: should not happen") identity (lookup n m)
198
199 indexNgramsT :: Map NgramsTerms NgramsId -> NgramsT Ngrams -> NgramsT NgramsIndexed
200 indexNgramsT = fmap . indexNgramsWith . withMap
201
202 indexNgrams :: Map NgramsTerms NgramsId -> Ngrams -> NgramsIndexed
203 indexNgrams = indexNgramsWith . withMap
204
205 -- NP: not sure we need it anymore
206 indexNgramsTWith :: (NgramsTerms -> NgramsId) -> NgramsT Ngrams -> NgramsT NgramsIndexed
207 indexNgramsTWith = fmap . indexNgramsWith
208
209 indexNgramsWith :: (NgramsTerms -> NgramsId) -> Ngrams -> NgramsIndexed
210 indexNgramsWith f n = NgramsIndexed n (f $ _ngramsTerms n)
211
212
213
214