]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Node/Corpus/Export.hs
[john-snow] implement pos/lemma language
[gargantext.git] / src / Gargantext / API / Node / Corpus / Export.hs
1 {-|
2 Module : Gargantext.API.Node.Corpus.Export
3 Description : Corpus export
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Main exports of Gargantext:
11 - corpus
12 - document and ngrams
13 - lists
14 -}
15
16 module Gargantext.API.Node.Corpus.Export
17 where
18
19 import Data.Map (Map)
20 import Data.Maybe (fromMaybe)
21 import Data.Set (Set)
22 import Data.Text (Text)
23 import qualified Data.List as List
24 import qualified Data.Map as Map
25 import qualified Data.Set as Set
26 import qualified Data.HashMap.Strict as HashMap
27
28 import Gargantext.API.Node.Corpus.Export.Types
29 import qualified Gargantext.API.Node.Document.Export.Types as DocumentExport
30 import Gargantext.API.Ngrams.Types
31 import Gargantext.API.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo')
32 import Gargantext.API.Prelude (GargNoServer)
33 import Gargantext.Prelude.Crypto.Hash (hash)
34 import Gargantext.Core.Types
35 import Gargantext.Core.NodeStory
36 import Gargantext.Database.Action.Metrics.NgramsByContext (getNgramsByContextOnlyUser)
37 import Gargantext.Database.Admin.Config (userMaster)
38 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
39 import Gargantext.Database.Prelude (Cmd)
40 import Gargantext.Database.Query.Table.Node
41 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
42 import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
43 import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
44 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
45 import Gargantext.Database.Schema.Context (_context_id, _context_hyperdata)
46 import Gargantext.Prelude
47
48 --------------------------------------------------
49 -- | Hashes are ordered by Set
50 getCorpus :: CorpusId
51 -> Maybe ListId
52 -> Maybe NgramsType
53 -> GargNoServer Corpus
54 getCorpus cId lId nt' = do
55
56 let
57 nt = case nt' of
58 Nothing -> NgramsTerms
59 Just t -> t
60
61 listId <- case lId of
62 Nothing -> defaultList cId
63 Just l -> pure l
64
65 ns <- Map.fromList
66 <$> map (\n -> (_context_id n, n))
67 <$> selectDocNodes cId
68
69 repo <- getRepo' [listId]
70 ngs <- getContextNgrams cId listId nt repo
71 let -- uniqId is hash computed already for each document imported in database
72 r = Map.intersectionWith
73 (\a b -> DocumentExport.Document { _d_document = context2node a
74 , _d_ngrams = DocumentExport.Ngrams (Set.toList b) (hash b)
75 , _d_hash = d_hash a b }
76 ) ns (Map.map (Set.map unNgramsTerm) ngs)
77 where
78 d_hash :: Context HyperdataDocument -> Set Text -> Text
79 d_hash a b = hash [ fromMaybe "" (_hd_uniqId $ _context_hyperdata a)
80 , hash b
81 ]
82 pure $ Corpus { _c_corpus = Map.elems r
83 , _c_hash = hash $ List.map DocumentExport._d_hash $ Map.elems r }
84
85 getContextNgrams :: HasNodeError err
86 => CorpusId
87 -> ListId
88 -> NgramsType
89 -> NodeListStory
90 -> Cmd err (Map ContextId (Set NgramsTerm))
91 getContextNgrams cId lId nt repo = do
92 -- lId <- case lId' of
93 -- Nothing -> defaultList cId
94 -- Just l -> pure l
95
96 lIds <- selectNodesWithUsername NodeList userMaster
97 let ngs = filterListWithRoot MapTerm $ mapTermListRoot [lId] nt repo
98 -- TODO HashMap
99 r <- getNgramsByContextOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs)
100 pure r
101
102 -- TODO
103 -- Exports List
104 -- Version number of the list