2 Module : Gargantext.Database.Flow
3 Description : Database Flow
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 add :: Corpus -> [Documents] -> IO Int
13 if new id -> extractNgrams + extract Authors + extract Sources
15 insert Ngrams -> NgramsId
16 Map (NgramsId, NodeId) -> insert
18 data NgramsType = Sources | Authors | Terms
20 nodes_ngrams : column type, column list
29 {-# LANGUAGE NoImplicitPrelude #-}
30 {-# LANGUAGE OverloadedStrings #-}
32 module Gargantext.Database.Flow
35 import System.FilePath (FilePath)
36 import Data.Maybe (Maybe(..))
37 import Gargantext.Core.Types (NodePoly(..))
38 import Gargantext.Prelude
39 import Gargantext.Database.Bashql (runCmd', del)
40 import Gargantext.Database.Node (getRoot, mkRoot, mkCorpus)
41 import Gargantext.Database.User (getUser, UserLight(..))
42 import Gargantext.Database.Node.Document.Import (insertDocuments)
43 import Gargantext.Text.Parsers (parseDocs, FileFormat(WOS))
45 flow :: FilePath -> IO ()
47 masterUser <- runCmd' (getUser "gargantua")
49 let masterUserId = case masterUser of
50 Nothing -> panic "Error: User does not exist (yet)" -- mk NodeUser gargantua_id "Node Gargantua"
51 Just user -> userLight_id user
53 rootId' <- map _node_id <$> runCmd' (getRoot masterUserId)
55 rootId'' <- case rootId' of
56 [] -> runCmd' (mkRoot masterUserId)
57 un -> case length un >= 2 of
58 True -> panic "Error: more than 1 userNode / user"
60 let rootId = maybe (panic "error rootId") identity (head rootId'')
61 printDebug "Root ID : " rootId
63 corpusId' <- runCmd' $ mkCorpus (Just "Corpus WOS") Nothing rootId masterUserId
64 let corpusId = maybe (panic "error corpusId") identity (head corpusId')
65 printDebug "Corpus ID : " corpusId
67 docs <- parseDocs WOS fp
68 ids <- runCmd' $ insertDocuments masterUserId corpusId docs
69 printDebug "Docs IDs : " ids
71 idsRepeat <- runCmd' $ insertDocuments masterUserId corpusId docs
72 printDebug "Docs IDs : " idsRepeat
74 _ <- runCmd' (del [corpusId])
78 --folderId <- mk Folder parentId (Name "Data") (Descr "All corpora DATA here")
79 folderId <- mk Folder rootId "Data"
80 corpusId <- mk Corpus folderId (Name "WOS") (Descr "WOS database description")
86 docs <- parseDocuments WOS "doc/.."
87 ids <- add (Documents corpusId) docs
89 user_id <- runCmd' (get RootUser "alexandre")
90 rootUser_id <- runCmd' (getRootUser $ userLight_id user_id