]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Node/Corpus/Export.hs
Merge branch 'dev-refactor-metrics' of ssh://gitlab.iscpif.fr:20022/gargantext/haskel...
[gargantext.git] / src / Gargantext / API / Node / Corpus / Export.hs
1 {-|
2 Module : Gargantext.API.Node.Corpus.Export
3 Description : Get Metrics from Storage (Database like)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Main exports of Gargantext:
11 - corpus
12 - document and ngrams
13 - lists
14 -}
15
16 module Gargantext.API.Node.Corpus.Export
17 where
18
19
20 import Data.HashMap.Strict (HashMap)
21 import Data.Map (Map)
22 import Data.Maybe (fromMaybe)
23 import Data.Set (Set)
24 import Data.Text (Text)
25 import Gargantext.API.Node.Corpus.Export.Types
26 import Gargantext.API.Ngrams.Types
27 import Gargantext.API.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo)
28 import Gargantext.API.Prelude (GargNoServer)
29 import Gargantext.Prelude.Crypto.Hash (hash)
30 import Gargantext.Core.Types
31 import Gargantext.Database.Action.Metrics.NgramsByNode (getNgramsByNodeOnlyUser)
32 import Gargantext.Database.Admin.Config (userMaster)
33 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
34 import Gargantext.Database.Prelude (Cmd)
35 import Gargantext.Database.Query.Table.Node
36 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
37 import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
38 import Gargantext.Database.Query.Table.NodeNode (selectDocNodes)
39 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
40 import Gargantext.Database.Schema.Node (_node_id, _node_hyperdata)
41 import Gargantext.Prelude
42 import qualified Data.List as List
43 import qualified Data.Map as Map
44 import qualified Data.Set as Set
45
46 --------------------------------------------------
47 -- | Hashes are ordered by Set
48 getCorpus :: CorpusId
49 -> Maybe ListId
50 -> Maybe NgramsType
51 -> GargNoServer Corpus
52 getCorpus cId lId nt' = do
53
54 let
55 nt = case nt' of
56 Nothing -> NgramsTerms
57 Just t -> t
58
59 ns <- Map.fromList
60 <$> map (\n -> (_node_id n, n))
61 <$> selectDocNodes cId
62 repo <- getRepo
63 ngs <- getNodeNgrams cId lId nt repo
64 let -- uniqId is hash computed already for each document imported in database
65 r = Map.intersectionWith (\a b -> Document a (Ngrams (Set.toList b) (hash b)) (d_hash a b)
66 ) ns ngs
67 where
68 d_hash a b = hash [ fromMaybe "" (_hd_uniqId $ _node_hyperdata a)
69 , hash b
70 ]
71 pure $ Corpus (Map.elems r) (hash $ List.map _d_hash
72 $ Map.elems r
73 )
74
75 getNodeNgrams :: HasNodeError err
76 => CorpusId
77 -> Maybe ListId
78 -> NgramsType
79 -> NgramsRepo
80 -> Cmd err (HashMap NodeId (Set Text))
81 getNodeNgrams cId lId' nt repo = do
82 lId <- case lId' of
83 Nothing -> defaultList cId
84 Just l -> pure l
85
86 lIds <- selectNodesWithUsername NodeList userMaster
87 let ngs = filterListWithRoot MapTerm $ mapTermListRoot [lId] nt repo
88 r <- getNgramsByNodeOnlyUser cId (lIds <> [lId]) nt (Map.keys ngs)
89 pure r
90
91 -- TODO
92 -- Exports List
93 -- Version number of the list