]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Action/Flow/List.hs
[DB/REFACT] intermediary step
[gargantext.git] / src / Gargantext / Database / Action / Flow / List.hs
1 {-|
2 Module : Gargantext.Database.Flow.List
3 Description : List Flow
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# OPTIONS_GHC -fno-warn-orphans #-}
13
14 {-# LANGUAGE ConstraintKinds #-}
15 {-# LANGUAGE RankNTypes #-}
16 {-# LANGUAGE ConstrainedClassMethods #-}
17 {-# LANGUAGE ConstraintKinds #-}
18 {-# LANGUAGE DeriveGeneric #-}
19 {-# LANGUAGE FlexibleContexts #-}
20 {-# LANGUAGE InstanceSigs #-}
21 {-# LANGUAGE NoImplicitPrelude #-}
22 {-# LANGUAGE OverloadedStrings #-}
23
24 module Gargantext.Database.Action.Flow.List
25 where
26
27 import Control.Monad (mapM_)
28 import Data.Map (Map, toList)
29 import Data.Either
30 import Data.Maybe (Maybe(..), catMaybes)
31 import Data.Set (Set)
32 import Data.Text (Text)
33 import Gargantext.API.Ngrams (NgramsElement(..), putListNgrams)
34 import Gargantext.Core (Lang(..))
35 import Gargantext.Core.Types.Individu
36 import Gargantext.Core.Flow.Types
37 import Gargantext.Core.Types.Main (ListType(CandidateTerm))
38 import Gargantext.Database.Action.Flow.Types
39 import Gargantext.Database.Admin.Config (userMaster, corpusMasterName)
40 import Gargantext.Database.Admin.Types.Node -- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
41 import Gargantext.Database.Admin.Utils (Cmd)
42 import Gargantext.Database.Schema.Ngrams -- (insertNgrams, Ngrams(..), NgramsIndexed(..), indexNgrams, NgramsType(..), text2ngrams, ngramsTypeId)
43 import Gargantext.Database.Query.Table.NodeNgrams (NodeNgramsPoly(..), NodeNgramsW, listInsertDb, getCgramsId)
44 import Gargantext.Database.Schema.Node_NodeNgramsNodeNgrams -- (insert_Node_NodeNgrams_NodeNgrams, Node_NodeNgrams_NodeNgrams(..))
45 import Gargantext.Prelude
46 import Gargantext.Text.List
47 import qualified Data.List as List
48 import qualified Data.Map as Map
49 import qualified Data.Set as Set
50 import Gargantext.Database.Action.Metrics.NgramsByNode
51 import Gargantext.Database.Action.Query.Tree.Root (getOrMk_RootWithCorpus)
52
53 -- FLOW LIST
54 -- 1. select specific terms of the corpus when compared with others langs
55 -- (for now, suppose english)
56 -- 2. select specific terms of the corpus when compared with others corpora (same database)
57 -- 3. select clusters of terms (generic and specific)
58
59 {-
60 data FlowList = FlowListLang
61 | FlowListTficf
62 | FlowListSpeGen
63
64
65 flowList_Tficf :: UserCorpusId
66 -> MasterCorpusId
67 -> NgramsType
68 -> (Text -> Text)
69 -> Cmd err (Map Text (Double, Set Text))
70 flowList_Tficf u m nt f = do
71
72 u' <- Map.filter (\s -> Set.size s > 1) <$> getNodesByNgramsUser u nt
73 m' <- Map.filter (\s -> Set.size s > 1) <$> getNodesByNgramsMaster u m
74
75 pure $ sortTficf Down
76 $ toTficfData (countNodesByNgramsWith f u')
77 (countNodesByNgramsWith f m')
78
79 flowList_Tficf' :: UserCorpusId
80 -> MasterCorpusId
81 -> NgramsType
82 -> Cmd err (Map Text (Double, Set Text))
83 flowList_Tficf' u m nt f = do
84
85 u' <- Map.filter (\s -> Set.size s > 1) <$> getNodesByNgramsUser u nt
86 m' <- Map.filter (\s -> Set.size s > 1) <$> getNodesByNgramsMaster u m
87
88 pure $ sortTficf Down
89 $ toTficfData (countNodesByNgramsWith f u')
90 (countNodesByNgramsWith f m')
91
92 -}
93
94
95
96
97
98
99 -- | TODO check optimization
100 mapNodeIdNgrams :: [DocumentIdWithNgrams a]
101 -> Map Ngrams (Map NgramsType (Map NodeId Int))
102 mapNodeIdNgrams = Map.unionsWith (Map.unionWith (Map.unionWith (+))) . fmap f
103 where
104 f :: DocumentIdWithNgrams a
105 -> Map Ngrams (Map NgramsType (Map NodeId Int))
106 f d = fmap (fmap (Map.singleton nId)) $ document_ngrams d
107 where
108 nId = documentId $ documentWithId d
109
110 ------------------------------------------------------------------------
111 flowList_DbRepo :: FlowCmdM env err m
112 => ListId
113 -> Map NgramsType [NgramsElement]
114 -> m ListId
115 flowList_DbRepo lId ngs = do
116 -- printDebug "listId flowList" lId
117 mapCgramsId <- listInsertDb lId toNodeNgramsW (Map.toList ngs)
118 let toInsert = catMaybes [ (,) <$> (getCgramsId mapCgramsId ntype <$> parent)
119 <*> getCgramsId mapCgramsId ntype ngram
120 | (ntype, ngs') <- Map.toList ngs
121 , NgramsElement ngram _ _ _ _ parent _ <- ngs'
122 ]
123 -- Inserting groups of ngrams
124 _r <- insert_Node_NodeNgrams_NodeNgrams
125 $ map (\(a,b) -> Node_NodeNgrams_NodeNgrams lId a b Nothing) toInsert
126
127 listInsert lId ngs
128
129 --trace (show $ List.filter (\n -> _ne_ngrams n == "versatile") $ List.concat $ Map.elems ngs) $ listInsert lId ngs
130 pure lId
131 ------------------------------------------------------------------------
132 ------------------------------------------------------------------------
133
134 toNodeNgramsW :: ListId
135 -> [(NgramsType, [NgramsElement])]
136 -> [NodeNgramsW]
137 toNodeNgramsW l ngs = List.concat $ map (toNodeNgramsW'' l) ngs
138 where
139 toNodeNgramsW'' :: ListId
140 -> (NgramsType, [NgramsElement])
141 -> [NodeNgramsW]
142 toNodeNgramsW'' l' (ngrams_type, elms) =
143 [ NodeNgrams Nothing l' list_type ngrams_terms' ngrams_type Nothing Nothing Nothing 0 |
144 (NgramsElement ngrams_terms' _size list_type _occ _root _parent _children) <- elms
145 ]
146
147
148 toNodeNgramsW' :: ListId
149 -> [(Text, [NgramsType])]
150 -> [NodeNgramsW]
151 toNodeNgramsW' l'' ngs = [ NodeNgrams Nothing l'' CandidateTerm terms ngrams_type Nothing Nothing Nothing 0
152 | (terms, ngrams_types) <- ngs
153 , ngrams_type <- ngrams_types
154 ]
155
156
157 listInsert :: FlowCmdM env err m
158 => ListId
159 -> Map NgramsType [NgramsElement]
160 -> m ()
161 listInsert lId ngs = mapM_ (\(typeList, ngElmts)
162 -> putListNgrams lId typeList ngElmts) (toList ngs)
163
164 ------------------------------------------------------------------------
165 ------------------------------------------------------------------------