2 Module : Gargantext.Database.Flow
3 Description : Database Flow
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 Map (NgramsId, NodeId) -> insert
12 data NgramsType = Sources | Authors | Terms
13 nodes_ngrams : column type, column list
21 {-# LANGUAGE DeriveGeneric #-}
22 {-# LANGUAGE NoImplicitPrelude #-}
23 {-# LANGUAGE OverloadedStrings #-}
25 module Gargantext.Database.Flow
27 import System.FilePath (FilePath)
28 import Data.Maybe (Maybe(..))
29 import Data.Text (Text, unpack)
31 import qualified Data.Map as DM
32 import GHC.Generics (Generic)
34 import Gargantext.Core.Types (NodePoly(..))
35 import Gargantext.Prelude
36 import Gargantext.Database.Bashql (runCmd', del)
37 import Gargantext.Database.Types.Node (Node(..), HyperdataDocument(..))
38 import Gargantext.Database.Node (getRoot, mkRoot, mkCorpus, Cmd(..), mkList)
39 import Gargantext.Database.User (getUser, UserLight(..), Username)
40 import Gargantext.Database.Node.Document.Insert (insertDocuments, ReturnId(..), addUniqIds)
41 import Gargantext.Database.Node.Document.Add (add)
42 import Gargantext.Database.NodeNgram (NodeNgramPoly(..), insertNodeNgrams)
43 import Gargantext.Text.Parsers (parseDocs, FileFormat(WOS))
44 import Gargantext.Database.Ngram (insertNgrams, Ngrams(..), NgramsT(..), NgramsIndexed(..), indexNgramsT, ngramsTypeId)
50 flow :: FilePath -> IO Int
53 (masterUserId, _, corpusId) <- subFlow "gargantua"
55 docs <- map addUniqIds <$> parseDocs WOS fp
56 ids <- runCmd' $ insertDocuments masterUserId corpusId docs
57 printDebug "Docs IDs : " ids
59 idsRepeat <- runCmd' $ insertDocuments masterUserId corpusId docs
60 printDebug "Docs IDs : " idsRepeat
62 (_, _, corpusId2) <- subFlow "alexandre"
64 inserted <- runCmd' $ add corpusId2 (map reId ids)
65 printDebug "Inserted : " inserted
67 runCmd' $ del [corpusId2, corpusId]
70 subFlow :: Username -> IO (UserId, RootId, CorpusId)
72 maybeUserId <- runCmd' (getUser username)
74 let userId = case maybeUserId of
75 Nothing -> panic "Error: User does not exist (yet)"
76 -- mk NodeUser gargantua_id "Node Gargantua"
77 Just user -> userLight_id user
79 rootId' <- map _node_id <$> runCmd' (getRoot userId)
81 rootId'' <- case rootId' of
82 [] -> runCmd' (mkRoot userId)
83 un -> case length un >= 2 of
84 True -> panic "Error: more than 1 userNode / user"
86 let rootId = maybe (panic "error rootId") identity (head rootId'')
88 corpusId' <- runCmd' $ mkCorpus (Just "Corpus WOS") Nothing rootId userId
89 let corpusId = maybe (panic "error corpusId") identity (head corpusId')
91 printDebug "(username, userId, rootId, corpusId"
92 (username, userId, rootId, corpusId)
93 pure (userId, rootId, corpusId)
95 ----------------------------------------------------------------
99 type ToInsert = Map HashId HyperdataDocument
100 type Inserted = Map HashId ReturnId
102 toInsert :: [HyperdataDocument] -> Map HashId HyperdataDocument
103 toInsert = DM.fromList . map (\d -> (hash (_hyperdataDocument_uniqIdBdd d), d))
105 hash = maybe "Error" identity
107 toInserted :: [ReturnId] -> Map HashId ReturnId
108 toInserted rs = DM.fromList $ map (\r -> (reUniqId r, r) )
109 $ filter (\r -> reInserted r == True) rs
111 data DocumentWithId = DocumentWithId { documentId :: NodeId
112 , documentData :: HyperdataDocument
116 mergeData :: Map HashId ReturnId -> Map HashId HyperdataDocument -> [DocumentWithId]
117 mergeData rs hs = map (\(hash,r) -> DocumentWithId (reId r) (lookup' hash hs)) $ DM.toList rs
119 lookup' h xs = maybe (panic $ "Error with " <> h) identity (DM.lookup h xs)
121 data DocumentIdWithNgrams = DocumentIdWithNgrams { documentWithId :: DocumentWithId
122 , document_ngrams :: Map (NgramsT Ngrams)Int
126 documentIdWithNgrams :: (HyperdataDocument -> Map (NgramsT Ngrams) Int)
127 -> [DocumentWithId] -> [DocumentIdWithNgrams]
128 documentIdWithNgrams f = map (\d -> DocumentIdWithNgrams d ((f . documentData) d))
130 -- | TODO check optimization
131 mapNodeIdNgrams :: [DocumentIdWithNgrams] -> Map (NgramsT Ngrams) (Map NodeId Int)
132 mapNodeIdNgrams ds = DM.map (DM.fromListWith (+)) $ DM.fromListWith (<>) xs
134 xs = [(ng, [(nId, i)]) | (nId, n2i') <- n2i ds, (ng, i) <- DM.toList n2i']
135 n2i = map (\d -> ((documentId . documentWithId) d, document_ngrams d))
137 indexNgrams :: Map (NgramsT Ngrams ) (Map NodeId Int)
138 -> Cmd (Map (NgramsT NgramsIndexed) (Map NodeId Int))
139 indexNgrams ng2nId = do
140 terms2id <- insertNgrams (map _ngramsT $ DM.keys ng2nId)
141 pure $ DM.mapKeys (indexNgramsT terms2id) ng2nId
144 insertToNodeNgrams :: Map (NgramsT NgramsIndexed) (Map NodeId Int) -> Cmd Int
145 insertToNodeNgrams m = insertNodeNgrams $ [ NodeNgram Nothing nId ((_ngramsId . _ngramsT ) ng)
146 (fromIntegral n) ((ngramsTypeId . _ngramsType) ng)
148 | (ng, nId2int) <- DM.toList m
149 , (nId, n) <- DM.toList nId2int
152 listFlow :: UserId -> CorpusId -> Map (NgramsT NgramsIndexed) (Map NodeId Int) -> Cmd [ListId]
153 listFlow uId cId ng = do
154 lId <- mkList cId uId
156 -- insertGroups = NodeNgramsNgrams
161 -- compute Candidate / Map
162 -- ALTER TABLE nodes_nodes_ngrams ADD COLUMN typelist int;
163 -- insertLists = NodeNodeNgram
168 -- | TODO ask on meeting
169 -- get data of NgramsTable
170 -- post :: update NodeNodeNgrams