]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Flow.hs
[FLOW][DB][LIST] creating subflow for list
[gargantext.git] / src / Gargantext / Database / Flow.hs
1 {-|
2 Module : Gargantext.Database.Flow
3 Description : Database Flow
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 Map (NgramsId, NodeId) -> insert
12 data NgramsType = Sources | Authors | Terms
13 nodes_ngrams : column type, column list
14
15 documents
16 sources
17 authors
18
19 -}
20
21 {-# LANGUAGE DeriveGeneric #-}
22 {-# LANGUAGE NoImplicitPrelude #-}
23 {-# LANGUAGE OverloadedStrings #-}
24
25 module Gargantext.Database.Flow
26 where
27 import System.FilePath (FilePath)
28 import Data.Maybe (Maybe(..))
29 import Data.Text (Text, unpack)
30 import Data.Map (Map)
31 import qualified Data.Map as DM
32 import GHC.Generics (Generic)
33
34 import Gargantext.Core.Types (NodePoly(..))
35 import Gargantext.Prelude
36 import Gargantext.Database.Bashql (runCmd', del)
37 import Gargantext.Database.Types.Node (Node(..), HyperdataDocument(..))
38 import Gargantext.Database.Node (getRoot, mkRoot, mkCorpus, Cmd(..), mkList)
39 import Gargantext.Database.User (getUser, UserLight(..), Username)
40 import Gargantext.Database.Node.Document.Insert (insertDocuments, ReturnId(..), addUniqIds)
41 import Gargantext.Database.Node.Document.Add (add)
42 import Gargantext.Database.NodeNgram (NodeNgramPoly(..), insertNodeNgrams)
43 import Gargantext.Text.Parsers (parseDocs, FileFormat(WOS))
44 import Gargantext.Database.Ngram (insertNgrams, Ngrams(..), NgramsT(..), NgramsIndexed(..), indexNgramsT, ngramsTypeId)
45
46 type UserId = Int
47 type RootId = Int
48 type CorpusId = Int
49
50 flow :: FilePath -> IO Int
51 flow fp = do
52
53 (masterUserId, _, corpusId) <- subFlow "gargantua"
54
55 docs <- map addUniqIds <$> parseDocs WOS fp
56 ids <- runCmd' $ insertDocuments masterUserId corpusId docs
57 printDebug "Docs IDs : " ids
58
59 idsRepeat <- runCmd' $ insertDocuments masterUserId corpusId docs
60 printDebug "Docs IDs : " idsRepeat
61
62 (_, _, corpusId2) <- subFlow "alexandre"
63
64 inserted <- runCmd' $ add corpusId2 (map reId ids)
65 printDebug "Inserted : " inserted
66
67 runCmd' $ del [corpusId2, corpusId]
68
69
70 subFlow :: Username -> IO (UserId, RootId, CorpusId)
71 subFlow username = do
72 maybeUserId <- runCmd' (getUser username)
73
74 let userId = case maybeUserId of
75 Nothing -> panic "Error: User does not exist (yet)"
76 -- mk NodeUser gargantua_id "Node Gargantua"
77 Just user -> userLight_id user
78
79 rootId' <- map _node_id <$> runCmd' (getRoot userId)
80
81 rootId'' <- case rootId' of
82 [] -> runCmd' (mkRoot userId)
83 un -> case length un >= 2 of
84 True -> panic "Error: more than 1 userNode / user"
85 False -> pure rootId'
86 let rootId = maybe (panic "error rootId") identity (head rootId'')
87
88 corpusId' <- runCmd' $ mkCorpus (Just "Corpus WOS") Nothing rootId userId
89 let corpusId = maybe (panic "error corpusId") identity (head corpusId')
90
91 printDebug "(username, userId, rootId, corpusId"
92 (username, userId, rootId, corpusId)
93 pure (userId, rootId, corpusId)
94
95 ----------------------------------------------------------------
96 type HashId = Text
97 type NodeId = Int
98 type ListId = Int
99 type ToInsert = Map HashId HyperdataDocument
100 type Inserted = Map HashId ReturnId
101
102 toInsert :: [HyperdataDocument] -> Map HashId HyperdataDocument
103 toInsert = DM.fromList . map (\d -> (hash (_hyperdataDocument_uniqIdBdd d), d))
104 where
105 hash = maybe "Error" identity
106
107 toInserted :: [ReturnId] -> Map HashId ReturnId
108 toInserted rs = DM.fromList $ map (\r -> (reUniqId r, r) )
109 $ filter (\r -> reInserted r == True) rs
110
111 data DocumentWithId = DocumentWithId { documentId :: NodeId
112 , documentData :: HyperdataDocument
113 }
114
115
116 mergeData :: Map HashId ReturnId -> Map HashId HyperdataDocument -> [DocumentWithId]
117 mergeData rs hs = map (\(hash,r) -> DocumentWithId (reId r) (lookup' hash hs)) $ DM.toList rs
118 where
119 lookup' h xs = maybe (panic $ "Error with " <> h) identity (DM.lookup h xs)
120
121 data DocumentIdWithNgrams = DocumentIdWithNgrams { documentWithId :: DocumentWithId
122 , document_ngrams :: Map (NgramsT Ngrams)Int
123 }
124
125
126 documentIdWithNgrams :: (HyperdataDocument -> Map (NgramsT Ngrams) Int)
127 -> [DocumentWithId] -> [DocumentIdWithNgrams]
128 documentIdWithNgrams f = map (\d -> DocumentIdWithNgrams d ((f . documentData) d))
129
130 -- | TODO check optimization
131 mapNodeIdNgrams :: [DocumentIdWithNgrams] -> Map (NgramsT Ngrams) (Map NodeId Int)
132 mapNodeIdNgrams ds = DM.map (DM.fromListWith (+)) $ DM.fromListWith (<>) xs
133 where
134 xs = [(ng, [(nId, i)]) | (nId, n2i') <- n2i ds, (ng, i) <- DM.toList n2i']
135 n2i = map (\d -> ((documentId . documentWithId) d, document_ngrams d))
136
137 indexNgrams :: Map (NgramsT Ngrams ) (Map NodeId Int)
138 -> Cmd (Map (NgramsT NgramsIndexed) (Map NodeId Int))
139 indexNgrams ng2nId = do
140 terms2id <- insertNgrams (map _ngramsT $ DM.keys ng2nId)
141 pure $ DM.mapKeys (indexNgramsT terms2id) ng2nId
142
143
144 insertToNodeNgrams :: Map (NgramsT NgramsIndexed) (Map NodeId Int) -> Cmd Int
145 insertToNodeNgrams m = insertNodeNgrams $ [ NodeNgram Nothing nId ((_ngramsId . _ngramsT ) ng)
146 (fromIntegral n) ((ngramsTypeId . _ngramsType) ng)
147
148 | (ng, nId2int) <- DM.toList m
149 , (nId, n) <- DM.toList nId2int
150 ]
151
152 listFlow :: UserId -> CorpusId -> Map (NgramsT NgramsIndexed) (Map NodeId Int) -> Cmd [ListId]
153 listFlow uId cId ng = do
154 lId <- mkList cId uId
155 -- groupNgramsBy fun
156 -- insertGroups = NodeNgramsNgrams
157
158 pure lId
159
160
161 -- compute Candidate / Map
162 -- ALTER TABLE nodes_nodes_ngrams ADD COLUMN typelist int;
163 -- insertLists = NodeNodeNgram
164
165
166
167
168 -- | TODO ask on meeting
169 -- get data of NgramsTable
170 -- post :: update NodeNodeNgrams
171 -- group ngrams
172