]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Node/Corpus/Export.hs
[framewrite] better line parsing
[gargantext.git] / src / Gargantext / API / Node / Corpus / Export.hs
1 {-|
2 Module : Gargantext.API.Node.Corpus.Export
3 Description : Get Metrics from Storage (Database like)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Main exports of Gargantext:
11 - corpus
12 - document and ngrams
13 - lists
14 -}
15
16 module Gargantext.API.Node.Corpus.Export
17 where
18
19 import Data.Map (Map)
20 import Data.Maybe (fromMaybe)
21 import Data.Set (Set)
22 import qualified Data.List as List
23 import qualified Data.Map as Map
24 import qualified Data.Set as Set
25 import qualified Data.HashMap.Strict as HashMap
26
27 import Gargantext.API.Node.Corpus.Export.Types
28 import Gargantext.API.Ngrams.Types
29 import Gargantext.API.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo')
30 import Gargantext.API.Prelude (GargNoServer)
31 import Gargantext.Prelude.Crypto.Hash (hash)
32 import Gargantext.Core.Types
33 import Gargantext.Core.NodeStory
34 import Gargantext.Database.Action.Metrics.NgramsByNode (getNgramsByNodeOnlyUser)
35 import Gargantext.Database.Admin.Config (userMaster)
36 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
37 import Gargantext.Database.Prelude (Cmd)
38 import Gargantext.Database.Query.Table.Node
39 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
40 import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
41 import Gargantext.Database.Query.Table.NodeNode (selectDocNodes)
42 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
43 import Gargantext.Database.Schema.Node (_node_id, _node_hyperdata)
44 import Gargantext.Prelude
45
46 --------------------------------------------------
47 -- | Hashes are ordered by Set
48 getCorpus :: CorpusId
49 -> Maybe ListId
50 -> Maybe NgramsType
51 -> GargNoServer Corpus
52 getCorpus cId lId nt' = do
53
54 let
55 nt = case nt' of
56 Nothing -> NgramsTerms
57 Just t -> t
58
59 ns <- Map.fromList
60 <$> map (\n -> (_node_id n, n))
61 <$> selectDocNodes cId
62
63 repo <- getRepo' [fromMaybe (panic "[Gargantext.API.Node.Corpus.Export]") lId]
64 ngs <- getNodeNgrams cId lId nt repo
65 let -- uniqId is hash computed already for each document imported in database
66 r = Map.intersectionWith (\a b -> Document a (Ngrams (Set.toList b) (hash b)) (d_hash a b)
67 ) ns (Map.map (Set.map unNgramsTerm) ngs)
68 where
69 d_hash a b = hash [ fromMaybe "" (_hd_uniqId $ _node_hyperdata a)
70 , hash b
71 ]
72 pure $ Corpus (Map.elems r) (hash $ List.map _d_hash
73 $ Map.elems r
74 )
75
76 getNodeNgrams :: HasNodeError err
77 => CorpusId
78 -> Maybe ListId
79 -> NgramsType
80 -> NodeListStory
81 -> Cmd err (Map NodeId (Set NgramsTerm))
82 getNodeNgrams cId lId' nt repo = do
83 lId <- case lId' of
84 Nothing -> defaultList cId
85 Just l -> pure l
86
87 lIds <- selectNodesWithUsername NodeList userMaster
88 let ngs = filterListWithRoot MapTerm $ mapTermListRoot [lId] nt repo
89 -- TODO HashMap
90 r <- getNgramsByNodeOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs)
91 pure r
92
93 -- TODO
94 -- Exports List
95 -- Version number of the list