]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Schema/Ngrams.hs
Merge branch 'dev-lts-16.26-upgrade' into dev-tree-reload
[gargantext.git] / src / Gargantext / Database / Schema / Ngrams.hs
1 {-|
2 Module : Gargantext.Database.Schema.Ngrams
3 Description : Ngram connection to the Database
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Ngrams connection to the Database.
11
12 -}
13
14 {-# LANGUAGE Arrows #-}
15 {-# LANGUAGE FunctionalDependencies #-}
16 {-# LANGUAGE QuasiQuotes #-}
17 {-# LANGUAGE TemplateHaskell #-}
18
19 module Gargantext.Database.Schema.Ngrams
20 where
21
22 import Data.Hashable (Hashable)
23 import Codec.Serialise (Serialise())
24 import Control.Lens (over)
25 import Control.Monad (mzero)
26 import Data.Aeson
27 import Data.Aeson.Types (toJSONKeyText)
28 import Data.Map (Map, fromList, lookup)
29 import Data.Text (Text, splitOn, pack, strip)
30 import Gargantext.Core.Types (TODO(..))
31 import Gargantext.Prelude
32 import Prelude (Functor)
33 import Servant (FromHttpApiData, parseUrlPiece, Proxy(..))
34 import Text.Read (read)
35 import Gargantext.Database.Schema.Prelude
36 import qualified Database.PostgreSQL.Simple as PGS
37
38
39 type NgramsId = Int
40 type NgramsTerms = Text
41 type Size = Int
42
43 data NgramsPoly id terms n = NgramsDB { _ngrams_id :: !id
44 , _ngrams_terms :: !terms
45 , _ngrams_n :: !n
46 } deriving (Show)
47
48 type NgramsWrite = NgramsPoly (Maybe (Column PGInt4))
49 (Column PGText)
50 (Column PGInt4)
51
52 type NgramsRead = NgramsPoly (Column PGInt4)
53 (Column PGText)
54 (Column PGInt4)
55
56 type NgramsReadNull = NgramsPoly (Column (Nullable PGInt4))
57 (Column (Nullable PGText))
58 (Column (Nullable PGInt4))
59
60 type NgramsDB = NgramsPoly Int Text Int
61
62 $(makeAdaptorAndInstance "pNgramsDb" ''NgramsPoly)
63 makeLenses ''NgramsPoly
64
65
66 ngramsTable :: Table NgramsWrite NgramsRead
67 ngramsTable = Table "ngrams" (pNgramsDb NgramsDB { _ngrams_id = optional "id"
68 , _ngrams_terms = required "terms"
69 , _ngrams_n = required "n"
70 }
71 )
72
73
74
75 -- | Main Ngrams Types
76 -- | Typed Ngrams
77 -- Typed Ngrams localize the context of the ngrams
78 -- ngrams in source field of document has Sources Type
79 -- ngrams in authors field of document has Authors Type
80 -- ngrams in text (title or abstract) of documents has Terms Type
81 data NgramsType = Authors | Institutes | Sources | NgramsTerms
82 deriving (Eq, Show, Read, Ord, Enum, Bounded, Generic)
83
84 instance Serialise NgramsType
85 instance Hashable NgramsType
86
87 ngramsTypes :: [NgramsType]
88 ngramsTypes = [minBound..]
89
90 instance ToSchema NgramsType
91 {- where
92 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
93 --}
94
95 newtype NgramsTypeId = NgramsTypeId Int
96 deriving (Eq, Show, Ord, Num)
97
98 instance ToField NgramsTypeId where
99 toField (NgramsTypeId n) = toField n
100
101 instance FromField NgramsTypeId where
102 fromField fld mdata = do
103 n <- fromField fld mdata
104 if (n :: Int) > 0 then return $ NgramsTypeId n
105 else mzero
106
107 instance FromJSON NgramsType
108 instance FromJSONKey NgramsType where
109 fromJSONKey = FromJSONKeyTextParser (parseJSON . String)
110 instance ToJSON NgramsType
111 instance ToJSONKey NgramsType where
112 toJSONKey = toJSONKeyText (pack . show)
113
114 instance FromHttpApiData NgramsType where
115 parseUrlPiece n = pure $ (read . cs) n
116
117 instance ToParamSchema NgramsType where
118 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
119
120
121 instance QueryRunnerColumnDefault (Nullable PGInt4) NgramsTypeId
122 where
123 queryRunnerColumnDefault = fieldQueryRunnerColumn
124
125 pgNgramsType :: NgramsType -> Column PGInt4
126 pgNgramsType = pgNgramsTypeId . ngramsTypeId
127
128 pgNgramsTypeId :: NgramsTypeId -> Column PGInt4
129 pgNgramsTypeId (NgramsTypeId n) = pgInt4 n
130
131 ngramsTypeId :: NgramsType -> NgramsTypeId
132 ngramsTypeId Authors = 1
133 ngramsTypeId Institutes = 2
134 ngramsTypeId Sources = 3
135 ngramsTypeId NgramsTerms = 4
136
137 fromNgramsTypeId :: NgramsTypeId -> Maybe NgramsType
138 fromNgramsTypeId id = lookup id
139 $ fromList [ (ngramsTypeId nt,nt)
140 | nt <- [minBound .. maxBound] :: [NgramsType]
141 ]
142
143 ------------------------------------------------------------------------
144 -- | TODO put it in Gargantext.Core.Text.Ngrams
145 data Ngrams = UnsafeNgrams { _ngramsTerms :: Text
146 , _ngramsSize :: Int
147 } deriving (Generic, Show, Eq, Ord)
148
149 makeLenses ''Ngrams
150 instance PGS.ToRow Ngrams where
151 toRow (UnsafeNgrams t s) = [toField t, toField s]
152
153 text2ngrams :: Text -> Ngrams
154 text2ngrams txt = UnsafeNgrams txt' $ length $ splitOn " " txt'
155 where
156 txt' = strip txt
157
158
159 -------------------------------------------------------------------------
160 -- | TODO put it in Gargantext.Core.Text.Ngrams
161 -- Named entity are typed ngrams of Terms Ngrams
162 data NgramsT a =
163 NgramsT { _ngramsType :: NgramsType
164 , _ngramsT :: a
165 } deriving (Generic, Show, Eq, Ord)
166
167 makeLenses ''NgramsT
168
169 instance Functor NgramsT where
170 fmap = over ngramsT
171 -----------------------------------------------------------------------
172 data NgramsIndexed =
173 NgramsIndexed
174 { _ngrams :: Ngrams
175 , _ngramsId :: NgramsId
176 } deriving (Show, Generic, Eq, Ord)
177
178 makeLenses ''NgramsIndexed
179 ------------------------------------------------------------------------
180 data NgramIds =
181 NgramIds
182 { ngramId :: Int
183 , ngramTerms :: Text
184 } deriving (Show, Generic, Eq, Ord)
185
186 instance PGS.FromRow NgramIds where
187 fromRow = NgramIds <$> field <*> field
188
189 ----------------------
190 withMap :: Map NgramsTerms NgramsId -> NgramsTerms -> NgramsId
191 withMap m n = maybe (panic "withMap: should not happen") identity (lookup n m)
192
193 indexNgramsT :: Map NgramsTerms NgramsId -> NgramsT Ngrams -> NgramsT NgramsIndexed
194 indexNgramsT = fmap . indexNgramsWith . withMap
195
196 indexNgrams :: Map NgramsTerms NgramsId -> Ngrams -> NgramsIndexed
197 indexNgrams = indexNgramsWith . withMap
198
199 -- NP: not sure we need it anymore
200 indexNgramsTWith :: (NgramsTerms -> NgramsId) -> NgramsT Ngrams -> NgramsT NgramsIndexed
201 indexNgramsTWith = fmap . indexNgramsWith
202
203 indexNgramsWith :: (NgramsTerms -> NgramsId) -> Ngrams -> NgramsIndexed
204 indexNgramsWith f n = NgramsIndexed n (f $ _ngramsTerms n)
205
206
207
208