]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Node/Corpus/Export.hs
[FIX] clustering, order 2 similarity, ok
[gargantext.git] / src / Gargantext / API / Node / Corpus / Export.hs
1 {-|
2 Module : Gargantext.API.Node.Corpus.Export
3 Description : Get Metrics from Storage (Database like)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Main exports of Gargantext:
11 - corpus
12 - document and ngrams
13 - lists
14 -}
15
16 module Gargantext.API.Node.Corpus.Export
17 where
18
19 import Data.Map (Map)
20 import Data.Maybe (fromMaybe)
21 import Data.Set (Set)
22 import Gargantext.API.Node.Corpus.Export.Types
23 import Gargantext.API.Ngrams.Types
24 import Gargantext.API.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo)
25 import Gargantext.API.Prelude (GargNoServer)
26 import Gargantext.Prelude.Crypto.Hash (hash)
27 import Gargantext.Core.Types
28 import Gargantext.Database.Action.Metrics.NgramsByNode (getNgramsByNodeOnlyUser)
29 import Gargantext.Database.Admin.Config (userMaster)
30 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
31 import Gargantext.Database.Prelude (Cmd)
32 import Gargantext.Database.Query.Table.Node
33 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
34 import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
35 import Gargantext.Database.Query.Table.NodeNode (selectDocNodes)
36 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
37 import Gargantext.Database.Schema.Node (_node_id, _node_hyperdata)
38 import Gargantext.Prelude
39 import qualified Data.List as List
40 import qualified Data.Map as Map
41 import qualified Data.Set as Set
42 import qualified Data.HashMap.Strict as HashMap
43
44 --------------------------------------------------
45 -- | Hashes are ordered by Set
46 getCorpus :: CorpusId
47 -> Maybe ListId
48 -> Maybe NgramsType
49 -> GargNoServer Corpus
50 getCorpus cId lId nt' = do
51
52 let
53 nt = case nt' of
54 Nothing -> NgramsTerms
55 Just t -> t
56
57 ns <- Map.fromList
58 <$> map (\n -> (_node_id n, n))
59 <$> selectDocNodes cId
60 repo <- getRepo
61 ngs <- getNodeNgrams cId lId nt repo
62 let -- uniqId is hash computed already for each document imported in database
63 r = Map.intersectionWith (\a b -> Document a (Ngrams (Set.toList b) (hash b)) (d_hash a b)
64 ) ns (Map.map (Set.map unNgramsTerm) ngs)
65 where
66 d_hash a b = hash [ fromMaybe "" (_hd_uniqId $ _node_hyperdata a)
67 , hash b
68 ]
69 pure $ Corpus (Map.elems r) (hash $ List.map _d_hash
70 $ Map.elems r
71 )
72
73 getNodeNgrams :: HasNodeError err
74 => CorpusId
75 -> Maybe ListId
76 -> NgramsType
77 -> NgramsRepo
78 -> Cmd err (Map NodeId (Set NgramsTerm))
79 getNodeNgrams cId lId' nt repo = do
80 lId <- case lId' of
81 Nothing -> defaultList cId
82 Just l -> pure l
83
84 lIds <- selectNodesWithUsername NodeList userMaster
85 let ngs = filterListWithRoot MapTerm $ mapTermListRoot [lId] nt repo
86 -- TODO HashMap
87 r <- getNgramsByNodeOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs)
88 pure r
89
90 -- TODO
91 -- Exports List
92 -- Version number of the list