]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Schema/Ngrams.hs
[API] refactoring / split API with Routes.
[gargantext.git] / src / Gargantext / Database / Schema / Ngrams.hs
1 {-|
2 Module : Gargantext.Database.Schema.Ngrams
3 Description : Ngram connection to the Database
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Ngrams connection to the Database.
11
12 -}
13
14 {-# LANGUAGE Arrows #-}
15 {-# LANGUAGE DeriveGeneric #-}
16 {-# LANGUAGE FlexibleContexts #-}
17 {-# LANGUAGE FlexibleInstances #-}
18 {-# LANGUAGE FunctionalDependencies #-}
19 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
20 {-# LANGUAGE MultiParamTypeClasses #-}
21 {-# LANGUAGE NoImplicitPrelude #-}
22 {-# LANGUAGE OverloadedStrings #-}
23 {-# LANGUAGE QuasiQuotes #-}
24 {-# LANGUAGE RankNTypes #-}
25 {-# LANGUAGE TemplateHaskell #-}
26
27 module Gargantext.Database.Schema.Ngrams
28 where
29
30 import Control.Lens (makeLenses, over)
31 import Control.Monad (mzero)
32 import Data.Aeson
33 import Data.Aeson.Types (toJSONKeyText)
34 import Data.Map (Map, fromList, lookup)
35 import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
36 import Data.Swagger (ToParamSchema, toParamSchema, ToSchema)
37 import Data.Text (Text, splitOn, pack)
38 import GHC.Generics (Generic)
39 import Gargantext.Core.Types (TODO(..))
40 import Gargantext.Prelude
41 import Prelude (Enum, Bounded, minBound, maxBound, Functor)
42 import Servant (FromHttpApiData, parseUrlPiece, Proxy(..))
43 import Text.Read (read)
44 import Gargantext.Database.Schema.Prelude
45 import qualified Database.PostgreSQL.Simple as PGS
46
47
48 type NgramsId = Int
49 type NgramsTerms = Text
50 type Size = Int
51
52 data NgramsPoly id terms n = NgramsDb { _ngrams_id :: !id
53 , _ngrams_terms :: !terms
54 , _ngrams_n :: !n
55 } deriving (Show)
56
57 type NgramsWrite = NgramsPoly (Maybe (Column PGInt4))
58 (Column PGText)
59 (Column PGInt4)
60
61 type NgramsRead = NgramsPoly (Column PGInt4)
62 (Column PGText)
63 (Column PGInt4)
64
65 type NgramsReadNull = NgramsPoly (Column (Nullable PGInt4))
66 (Column (Nullable PGText))
67 (Column (Nullable PGInt4))
68
69 type NgramsDb = NgramsPoly Int Text Int
70
71 $(makeAdaptorAndInstance "pNgramsDb" ''NgramsPoly)
72 makeLenses ''NgramsPoly
73
74
75 ngramsTable :: Table NgramsWrite NgramsRead
76 ngramsTable = Table "ngrams" (pNgramsDb NgramsDb { _ngrams_id = optional "id"
77 , _ngrams_terms = required "terms"
78 , _ngrams_n = required "n"
79 }
80 )
81
82
83
84 -- | Main Ngrams Types
85 -- | Typed Ngrams
86 -- Typed Ngrams localize the context of the ngrams
87 -- ngrams in source field of document has Sources Type
88 -- ngrams in authors field of document has Authors Type
89 -- ngrams in text (title or abstract) of documents has Terms Type
90 data NgramsType = Authors | Institutes | Sources | NgramsTerms
91 deriving (Eq, Show, Read, Ord, Enum, Bounded, Generic)
92
93 ngramsTypes :: [NgramsType]
94 ngramsTypes = [minBound..]
95
96 instance ToSchema NgramsType
97 {- where
98 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
99 --}
100
101 newtype NgramsTypeId = NgramsTypeId Int
102 deriving (Eq, Show, Ord, Num)
103
104 instance ToField NgramsTypeId where
105 toField (NgramsTypeId n) = toField n
106
107 instance FromField NgramsTypeId where
108 fromField fld mdata = do
109 n <- fromField fld mdata
110 if (n :: Int) > 0 then return $ NgramsTypeId n
111 else mzero
112
113 instance FromJSON NgramsType
114 instance FromJSONKey NgramsType where
115 fromJSONKey = FromJSONKeyTextParser (parseJSON . String)
116 instance ToJSON NgramsType
117 instance ToJSONKey NgramsType where
118 toJSONKey = toJSONKeyText (pack . show)
119
120 instance FromHttpApiData NgramsType where
121 parseUrlPiece n = pure $ (read . cs) n
122
123 instance ToParamSchema NgramsType where
124 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
125
126
127 instance QueryRunnerColumnDefault (Nullable PGInt4) NgramsTypeId
128 where
129 queryRunnerColumnDefault = fieldQueryRunnerColumn
130
131 pgNgramsType :: NgramsType -> Column PGInt4
132 pgNgramsType = pgNgramsTypeId . ngramsTypeId
133
134 pgNgramsTypeId :: NgramsTypeId -> Column PGInt4
135 pgNgramsTypeId (NgramsTypeId n) = pgInt4 n
136
137 ngramsTypeId :: NgramsType -> NgramsTypeId
138 ngramsTypeId Authors = 1
139 ngramsTypeId Institutes = 2
140 ngramsTypeId Sources = 3
141 ngramsTypeId NgramsTerms = 4
142
143 fromNgramsTypeId :: NgramsTypeId -> Maybe NgramsType
144 fromNgramsTypeId id = lookup id
145 $ fromList [ (ngramsTypeId nt,nt)
146 | nt <- [minBound .. maxBound] :: [NgramsType]
147 ]
148
149 ------------------------------------------------------------------------
150 -- | TODO put it in Gargantext.Text.Ngrams
151 data Ngrams = Ngrams { _ngramsTerms :: Text
152 , _ngramsSize :: Int
153 } deriving (Generic, Show, Eq, Ord)
154
155 makeLenses ''Ngrams
156 instance PGS.ToRow Ngrams where
157 toRow (Ngrams t s) = [toField t, toField s]
158
159 text2ngrams :: Text -> Ngrams
160 text2ngrams txt = Ngrams txt $ length $ splitOn " " txt
161
162 -------------------------------------------------------------------------
163 -- | TODO put it in Gargantext.Text.Ngrams
164 -- Named entity are typed ngrams of Terms Ngrams
165 data NgramsT a =
166 NgramsT { _ngramsType :: NgramsType
167 , _ngramsT :: a
168 } deriving (Generic, Show, Eq, Ord)
169
170 makeLenses ''NgramsT
171
172 instance Functor NgramsT where
173 fmap = over ngramsT
174 -----------------------------------------------------------------------
175 data NgramsIndexed =
176 NgramsIndexed
177 { _ngrams :: Ngrams
178 , _ngramsId :: NgramsId
179 } deriving (Show, Generic, Eq, Ord)
180
181 makeLenses ''NgramsIndexed
182 ------------------------------------------------------------------------
183 data NgramIds =
184 NgramIds
185 { ngramId :: Int
186 , ngramTerms :: Text
187 } deriving (Show, Generic, Eq, Ord)
188
189 instance PGS.FromRow NgramIds where
190 fromRow = NgramIds <$> field <*> field
191
192 ----------------------
193 withMap :: Map NgramsTerms NgramsId -> NgramsTerms -> NgramsId
194 withMap m n = maybe (panic "withMap: should not happen") identity (lookup n m)
195
196 indexNgramsT :: Map NgramsTerms NgramsId -> NgramsT Ngrams -> NgramsT NgramsIndexed
197 indexNgramsT = fmap . indexNgramsWith . withMap
198
199 indexNgrams :: Map NgramsTerms NgramsId -> Ngrams -> NgramsIndexed
200 indexNgrams = indexNgramsWith . withMap
201
202 -- NP: not sure we need it anymore
203 indexNgramsTWith :: (NgramsTerms -> NgramsId) -> NgramsT Ngrams -> NgramsT NgramsIndexed
204 indexNgramsTWith = fmap . indexNgramsWith
205
206 indexNgramsWith :: (NgramsTerms -> NgramsId) -> Ngrams -> NgramsIndexed
207 indexNgramsWith f n = NgramsIndexed n (f $ _ngramsTerms n)
208
209
210
211