]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Flow.hs
[FIX] API post with param.
[gargantext.git] / src / Gargantext / Database / Flow.hs
1 {-|
2 Module : Gargantext.Database.Flow
3 Description : Database Flow
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -- TODO-ACCESS:
12 -- check userId CanFillUserCorpus userCorpusId
13 -- check masterUserId CanFillMasterCorpus masterCorpusId
14
15 -- TODO-ACCESS: check uId CanInsertDoc pId && checkDocType nodeType
16 -- TODO-EVENTS: InsertedNodes
17
18
19 -}
20
21 {-# LANGUAGE ConstraintKinds #-}
22 {-# LANGUAGE DeriveGeneric #-}
23 {-# LANGUAGE FlexibleContexts #-}
24 {-# LANGUAGE InstanceSigs #-}
25 {-# LANGUAGE NoImplicitPrelude #-}
26 {-# LANGUAGE OverloadedStrings #-}
27 {-# LANGUAGE RankNTypes #-}
28 {-# LANGUAGE ConstrainedClassMethods #-}
29
30 module Gargantext.Database.Flow -- (flowDatabase, ngrams2list)
31 where
32
33 --import Debug.Trace (trace)
34 import Control.Lens ((^.), view, Lens', _Just)
35 import Control.Monad (mapM_)
36 import Control.Monad.IO.Class (liftIO)
37 import Data.List (concat)
38 import Data.Map (Map, lookup, toList)
39 import Data.Maybe (Maybe(..), catMaybes)
40 import Data.Monoid
41 import Data.Text (Text, splitOn, intercalate)
42 import GHC.Show (Show)
43 import Gargantext.API.Ngrams (HasRepoVar)
44 import Gargantext.API.Ngrams (NgramsElement(..), putListNgrams, RepoCmdM)
45 import Gargantext.Core (Lang(..))
46 import Gargantext.Core.Types (NodePoly(..), Terms(..))
47 import Gargantext.Core.Types.Individu (Username)
48 import Gargantext.Core.Types.Main
49 import Gargantext.Database.Config (userMaster, corpusMasterName)
50 import Gargantext.Database.Flow.Utils (insertDocNgrams)
51 import Gargantext.Database.Node.Contact -- (HyperdataContact(..), ContactWho(..))
52 import Gargantext.Database.Node.Document.Insert -- (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..))
53 import Gargantext.Database.Root (getRoot)
54 import Gargantext.Database.Schema.Ngrams -- (insertNgrams, Ngrams(..), NgramsIndexed(..), indexNgrams, NgramsType(..), text2ngrams, ngramsTypeId)
55 import Gargantext.Database.Schema.Node -- (mkRoot, mkCorpus, getOrMkList, mkGraph, mkDashboard, mkAnnuaire, getCorporaWithParentId, HasNodeError, NodeError(..), nodeError)
56 import Gargantext.Database.Schema.User (getUser, UserLight(..))
57 import Gargantext.Database.TextSearch (searchInDatabase)
58 import Gargantext.Database.Types.Node -- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
59 import Gargantext.Database.Utils (Cmd, CmdM)
60 import Gargantext.Ext.IMT (toSchoolName)
61 import Gargantext.Ext.IMTUser (deserialiseImtUsersFromFile)
62 import Gargantext.Prelude
63 import Gargantext.Text.Terms.Eleve (buildTries, toToken)
64 import Gargantext.Text.List (buildNgramsLists,StopSize(..))
65 import Gargantext.Text.Parsers (parseFile, FileFormat)
66 import Gargantext.Text.Terms (TermType(..), tt_lang, extractTerms, uniText)
67 import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
68 import Servant (ServantErr)
69 import System.FilePath (FilePath)
70 import qualified Data.List as List
71 import qualified Data.Map as Map
72 import qualified Data.Text as Text
73 import qualified Gargantext.Database.Node.Document.Add as Doc (add)
74 import qualified Gargantext.Text.Parsers.GrandDebat as GD
75
76 type FlowCmdM env err m =
77 ( CmdM env err m
78 , RepoCmdM env err m
79 , HasNodeError err
80 , HasRepoVar env
81 )
82
83 type FlowCorpus a = ( AddUniqId a
84 , UniqId a
85 , InsertDb a
86 , ExtractNgramsT a
87 , HasText a
88 )
89
90 ------------------------------------------------------------------------
91
92 flowAnnuaire :: FlowCmdM env ServantErr m
93 => Username -> CorpusName -> (TermType Lang) -> FilePath -> m AnnuaireId
94 flowAnnuaire u n l filePath = do
95 docs <- liftIO $ (( splitEvery 500 <$> deserialiseImtUsersFromFile filePath) :: IO [[HyperdataContact]])
96 flow (Nothing :: Maybe HyperdataAnnuaire) u n l docs
97
98
99 flowCorpusDebat :: FlowCmdM env ServantErr m
100 => Username -> CorpusName
101 -> Limit -> FilePath
102 -> m CorpusId
103 flowCorpusDebat u n l fp = do
104 docs <- liftIO ( splitEvery 500
105 <$> take l
106 <$> GD.readFile fp
107 :: IO [[GD.GrandDebatReference ]]
108 )
109 flowCorpus u n (Multi FR) (map (map toHyperdataDocument) docs)
110
111
112 flowCorpusFile :: FlowCmdM env ServantErr m
113 => Username -> CorpusName
114 -> Limit -- Limit the number of docs (for dev purpose)
115 -> TermType Lang -> FileFormat -> FilePath
116 -> m CorpusId
117 flowCorpusFile u n l la ff fp = do
118 docs <- liftIO ( splitEvery 500
119 <$> take l
120 <$> parseFile ff fp
121 )
122 flowCorpus u n la (map (map toHyperdataDocument) docs)
123
124 -- TODO query with complex query
125 flowCorpusSearchInDatabase :: FlowCmdM env err m
126 => Username -> Lang -> Text -> m CorpusId
127 flowCorpusSearchInDatabase u la q = do
128 (_masterUserId, _masterRootId, cId) <- getOrMkRootWithCorpus userMaster "" (Nothing :: Maybe HyperdataCorpus)
129 ids <- map fst <$> searchInDatabase cId (stemIt q)
130 flowCorpusUser la u q (Nothing :: Maybe HyperdataCorpus) ids
131
132
133 flowCorpusSearchInDatabase' :: FlowCmdM env ServantErr m
134 => Username -> Lang -> Text -> m CorpusId
135 flowCorpusSearchInDatabase' u la q = do
136 (_masterUserId, _masterRootId, cId) <- getOrMkRootWithCorpus userMaster "" (Nothing :: Maybe HyperdataCorpus)
137 ids <- map fst <$> searchInDatabase cId (stemIt q)
138 flowCorpusUser la u q (Nothing :: Maybe HyperdataCorpus) ids
139
140 ------------------------------------------------------------------------
141
142 flow :: (FlowCmdM env ServantErr m, FlowCorpus a, MkCorpus c)
143 => Maybe c -> Username -> CorpusName -> TermType Lang -> [[a]] -> m CorpusId
144 flow c u cn la docs = do
145 ids <- mapM (insertMasterDocs c la ) docs
146 flowCorpusUser (la ^. tt_lang) u cn c (concat ids)
147
148 flowCorpus :: (FlowCmdM env ServantErr m, FlowCorpus a)
149 => Username -> CorpusName -> TermType Lang -> [[a]] -> m CorpusId
150 flowCorpus = flow (Nothing :: Maybe HyperdataCorpus)
151
152
153 flowCorpusUser :: (FlowCmdM env err m, MkCorpus c)
154 => Lang -> Username -> CorpusName -> Maybe c -> [NodeId] -> m CorpusId
155 flowCorpusUser l userName corpusName ctype ids = do
156 -- User Flow
157 (userId, _rootId, userCorpusId) <- getOrMkRootWithCorpus userName corpusName ctype
158 -- TODO: check if present already, ignore
159 _ <- Doc.add userCorpusId ids
160
161 -- User List Flow
162 --{-
163 (_masterUserId, _masterRootId, masterCorpusId) <- getOrMkRootWithCorpus userMaster "" ctype
164 ngs <- buildNgramsLists l 2 3 (StopSize 3) userCorpusId masterCorpusId
165 userListId <- flowList userId userCorpusId ngs
166 printDebug "userListId" userListId
167 -- User Graph Flow
168 _ <- mkGraph userCorpusId userId
169 --}
170
171 -- User Dashboard Flow
172 _ <- mkDashboard userCorpusId userId
173
174 -- Annuaire Flow
175 -- _ <- mkAnnuaire rootUserId userId
176 pure userCorpusId
177
178
179 insertMasterDocs :: ( FlowCmdM env ServantErr m
180 , FlowCorpus a
181 , MkCorpus c
182 )
183 => Maybe c -> TermType Lang -> [a] -> m [DocId]
184 insertMasterDocs c lang hs = do
185 (masterUserId, _, masterCorpusId) <- getOrMkRootWithCorpus userMaster corpusMasterName c
186
187 -- TODO Type NodeDocumentUnicised
188 let hs' = map addUniqId hs
189 ids <- insertDb masterUserId masterCorpusId hs'
190 let documentsWithId = mergeData (toInserted ids) (Map.fromList $ map viewUniqId' hs')
191
192 let
193 fixLang (Unsupervised l n s m) = Unsupervised l n s m'
194 where
195 m' = case m of
196 Nothing -> Just $ buildTries n (fmap toToken $ uniText $ Text.intercalate " " $ List.concat $ map hasText documentsWithId)
197 m'' -> m''
198 fixLang l = l
199
200 lang' = fixLang lang
201 -- maps :: IO Map Ngrams (Map NgramsType (Map NodeId Int))
202 maps <- mapNodeIdNgrams <$> documentIdWithNgrams (extractNgramsT lang') documentsWithId
203 terms2id <- insertNgrams $ Map.keys maps
204 let indexedNgrams = Map.mapKeys (indexNgrams terms2id) maps
205
206 lId <- getOrMkList masterCorpusId masterUserId
207 _ <- insertDocNgrams lId indexedNgrams
208 pure $ map reId ids
209
210
211
212 type CorpusName = Text
213
214 getOrMkRootWithCorpus :: (HasNodeError err, MkCorpus a)
215 => Username -> CorpusName -> Maybe a
216 -> Cmd err (UserId, RootId, CorpusId)
217 getOrMkRootWithCorpus username cName c = do
218 maybeUserId <- getUser username
219 userId <- case maybeUserId of
220 Nothing -> nodeError NoUserFound
221 Just user -> pure $ userLight_id user
222
223 rootId' <- map _node_id <$> getRoot username
224
225 rootId'' <- case rootId' of
226 [] -> mkRoot username userId
227 n -> case length n >= 2 of
228 True -> nodeError ManyNodeUsers
229 False -> pure rootId'
230
231 rootId <- maybe (nodeError NoRootFound) pure (head rootId'')
232
233 corpusId'' <- if username == userMaster
234 then do
235 ns <- getCorporaWithParentId rootId
236 pure $ map _node_id ns
237 else
238 pure []
239
240 corpusId' <- if corpusId'' /= []
241 then pure corpusId''
242 else mk (Just cName) c rootId userId
243
244 corpusId <- maybe (nodeError NoCorpusFound) pure (head corpusId')
245
246 pure (userId, rootId, corpusId)
247
248
249 ------------------------------------------------------------------------
250
251
252 class UniqId a
253 where
254 uniqId :: Lens' a (Maybe HashId)
255
256
257 instance UniqId HyperdataDocument
258 where
259 uniqId = hyperdataDocument_uniqId
260
261 instance UniqId HyperdataContact
262 where
263 uniqId = hc_uniqId
264
265 viewUniqId' :: UniqId a => a -> (HashId, a)
266 viewUniqId' d = maybe err (\h -> (h,d)) (view uniqId d)
267 where
268 err = panic "[ERROR] Database.Flow.toInsert"
269
270
271 toInserted :: [ReturnId] -> Map HashId ReturnId
272 toInserted = Map.fromList . map (\r -> (reUniqId r, r) )
273 . filter (\r -> reInserted r == True)
274
275 data DocumentWithId a = DocumentWithId
276 { documentId :: !NodeId
277 , documentData :: !a
278 } deriving (Show)
279
280 instance HasText a => HasText (DocumentWithId a)
281 where
282 hasText (DocumentWithId _ a) = hasText a
283
284 mergeData :: Map HashId ReturnId
285 -> Map HashId a
286 -> [DocumentWithId a]
287 mergeData rs = catMaybes . map toDocumentWithId . Map.toList
288 where
289 toDocumentWithId (hash,hpd) =
290 DocumentWithId <$> fmap reId (lookup hash rs)
291 <*> Just hpd
292
293 ------------------------------------------------------------------------
294 data DocumentIdWithNgrams a = DocumentIdWithNgrams
295 { documentWithId :: !(DocumentWithId a)
296 , document_ngrams :: !(Map Ngrams (Map NgramsType Int))
297 } deriving (Show)
298
299
300 class ExtractNgramsT h
301 where
302 extractNgramsT :: HasText h => TermType Lang -> h -> Cmd err (Map Ngrams (Map NgramsType Int))
303
304 class HasText h
305 where
306 hasText :: h -> [Text]
307
308 instance HasText HyperdataContact
309 where
310 hasText = undefined
311
312 instance ExtractNgramsT HyperdataContact
313 where
314 extractNgramsT l hc = filterNgramsT 255 <$> extract l hc
315 where
316 extract :: TermType Lang -> HyperdataContact
317 -> Cmd err (Map Ngrams (Map NgramsType Int))
318 extract _l hc' = do
319 let authors = map text2ngrams
320 $ maybe ["Nothing"] (\a -> [a])
321 $ view (hc_who . _Just . cw_lastName) hc'
322
323 pure $ Map.fromList $ [(a', Map.singleton Authors 1) | a' <- authors ]
324
325 instance HasText HyperdataDocument
326 where
327 hasText h = catMaybes [ _hyperdataDocument_title h
328 , _hyperdataDocument_abstract h
329 ]
330
331 instance ExtractNgramsT HyperdataDocument
332 where
333 extractNgramsT :: TermType Lang -> HyperdataDocument -> Cmd err (Map Ngrams (Map NgramsType Int))
334 extractNgramsT lang hd = filterNgramsT 255 <$> extractNgramsT' lang hd
335 where
336 extractNgramsT' :: TermType Lang -> HyperdataDocument
337 -> Cmd err (Map Ngrams (Map NgramsType Int))
338 extractNgramsT' lang' doc = do
339 let source = text2ngrams
340 $ maybe "Nothing" identity
341 $ _hyperdataDocument_source doc
342
343 institutes = map text2ngrams
344 $ maybe ["Nothing"] (map toSchoolName . (splitOn ", "))
345 $ _hyperdataDocument_institutes doc
346
347 authors = map text2ngrams
348 $ maybe ["Nothing"] (splitOn ", ")
349 $ _hyperdataDocument_authors doc
350
351 terms' <- map text2ngrams
352 <$> map (intercalate " " . _terms_label)
353 <$> concat
354 <$> liftIO (extractTerms lang' $ hasText doc)
355
356 pure $ Map.fromList $ [(source, Map.singleton Sources 1)]
357 <> [(i', Map.singleton Institutes 1) | i' <- institutes ]
358 <> [(a', Map.singleton Authors 1) | a' <- authors ]
359 <> [(t', Map.singleton NgramsTerms 1) | t' <- terms' ]
360
361
362 filterNgramsT :: Int -> Map Ngrams (Map NgramsType Int)
363 -> Map Ngrams (Map NgramsType Int)
364 filterNgramsT s ms = Map.fromList $ map (\a -> filter' s a) $ Map.toList ms
365 where
366 filter' s' (ng@(Ngrams t n),y) = case (Text.length t) < s' of
367 True -> (ng,y)
368 False -> (Ngrams (Text.take s' t) n , y)
369
370
371 documentIdWithNgrams :: HasNodeError err
372 => (a
373 -> Cmd err (Map Ngrams (Map NgramsType Int)))
374 -> [DocumentWithId a]
375 -> Cmd err [DocumentIdWithNgrams a]
376 documentIdWithNgrams f = mapM toDocumentIdWithNgrams
377 where
378 toDocumentIdWithNgrams d = do
379 e <- f $ documentData d
380 pure $ DocumentIdWithNgrams d e
381
382
383 -- FLOW LIST
384 -- | TODO check optimization
385 mapNodeIdNgrams :: [DocumentIdWithNgrams a]
386 -> Map Ngrams (Map NgramsType (Map NodeId Int))
387 mapNodeIdNgrams = Map.unionsWith (Map.unionWith (Map.unionWith (+))) . fmap f
388 where
389 f :: DocumentIdWithNgrams a
390 -> Map Ngrams (Map NgramsType (Map NodeId Int))
391 f d = fmap (fmap (Map.singleton nId)) $ document_ngrams d
392 where
393 nId = documentId $ documentWithId d
394
395 ------------------------------------------------------------------------
396 listInsert :: FlowCmdM env err m
397 => ListId -> Map NgramsType [NgramsElement]
398 -> m ()
399 listInsert lId ngs = mapM_ (\(typeList, ngElmts)
400 -> putListNgrams lId typeList ngElmts
401 ) $ toList ngs
402
403 flowList :: FlowCmdM env err m => UserId -> CorpusId
404 -> Map NgramsType [NgramsElement]
405 -> m ListId
406 flowList uId cId ngs = do
407 lId <- getOrMkList cId uId
408 printDebug "listId flowList" lId
409 listInsert lId ngs
410 --trace (show $ List.filter (\n -> _ne_ngrams n == "versatile") $ List.concat $ Map.elems ngs) $ listInsert lId ngs
411 pure lId
412