]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Node/Corpus/Export.hs
Merge branch 'dev' into 97-dev-istex-search
[gargantext.git] / src / Gargantext / API / Node / Corpus / Export.hs
1 {-|
2 Module : Gargantext.API.Node.Corpus.Export
3 Description : Get Metrics from Storage (Database like)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Main exports of Gargantext:
11 - corpus
12 - document and ngrams
13 - lists
14 -}
15
16 module Gargantext.API.Node.Corpus.Export
17 where
18
19 import Data.Map (Map)
20 import Data.Maybe (fromMaybe)
21 import Data.Set (Set)
22 import qualified Data.List as List
23 import qualified Data.Map as Map
24 import qualified Data.Set as Set
25 import qualified Data.HashMap.Strict as HashMap
26
27 import Gargantext.API.Node.Corpus.Export.Types
28 import Gargantext.API.Ngrams.Types
29 import Gargantext.API.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo')
30 import Gargantext.API.Prelude (GargNoServer)
31 import Gargantext.Prelude.Crypto.Hash (hash)
32 import Gargantext.Core.Types
33 import Gargantext.Core.NodeStory
34 import Gargantext.Database.Action.Metrics.NgramsByNode (getNgramsByNodeOnlyUser)
35 import Gargantext.Database.Admin.Config (userMaster)
36 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
37 import Gargantext.Database.Prelude (Cmd)
38 import Gargantext.Database.Query.Table.Node
39 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
40 import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
41 import Gargantext.Database.Query.Table.NodeNode (selectDocNodes)
42 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
43 import Gargantext.Database.Schema.Node (_node_id, _node_hyperdata)
44 import Gargantext.Prelude
45
46 --------------------------------------------------
47 -- | Hashes are ordered by Set
48 getCorpus :: CorpusId
49 -> Maybe ListId
50 -> Maybe NgramsType
51 -> GargNoServer Corpus
52 getCorpus cId lId nt' = do
53
54 let
55 nt = case nt' of
56 Nothing -> NgramsTerms
57 Just t -> t
58
59 listId <- case lId of
60 Nothing -> defaultList cId
61 Just l -> pure l
62
63 ns <- Map.fromList
64 <$> map (\n -> (_node_id n, n))
65 <$> selectDocNodes cId
66
67 repo <- getRepo' [listId]
68 ngs <- getNodeNgrams cId listId nt repo
69 let -- uniqId is hash computed already for each document imported in database
70 r = Map.intersectionWith
71 (\a b -> Document { _d_document = a
72 , _d_ngrams = Ngrams (Set.toList b) (hash b)
73 , _d_hash = d_hash a b }
74 ) ns (Map.map (Set.map unNgramsTerm) ngs)
75 where
76 d_hash a b = hash [ fromMaybe "" (_hd_uniqId $ _node_hyperdata a)
77 , hash b
78 ]
79 pure $ Corpus { _c_corpus = Map.elems r
80 , _c_hash = hash $ List.map _d_hash $ Map.elems r }
81
82 getNodeNgrams :: HasNodeError err
83 => CorpusId
84 -> ListId
85 -> NgramsType
86 -> NodeListStory
87 -> Cmd err (Map NodeId (Set NgramsTerm))
88 getNodeNgrams cId lId nt repo = do
89 -- lId <- case lId' of
90 -- Nothing -> defaultList cId
91 -- Just l -> pure l
92
93 lIds <- selectNodesWithUsername NodeList userMaster
94 let ngs = filterListWithRoot MapTerm $ mapTermListRoot [lId] nt repo
95 -- TODO HashMap
96 r <- getNgramsByNodeOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs)
97 pure r
98
99 -- TODO
100 -- Exports List
101 -- Version number of the list