]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Action/Flow/List.hs
[FEAT/FIX] Stemming -> Parent/Children -> Patch ok
[gargantext.git] / src / Gargantext / Database / Action / Flow / List.hs
1 {-|
2 Module : Gargantext.Database.Flow.List
3 Description : List Flow
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE ConstraintKinds #-}
13 {-# LANGUAGE ConstrainedClassMethods #-}
14 {-# LANGUAGE ConstraintKinds #-}
15 {-# LANGUAGE InstanceSigs #-}
16
17 module Gargantext.Database.Action.Flow.List
18 where
19
20 import Control.Concurrent
21 import Control.Lens (view, (^.), (+~), (%~), at)
22 import Control.Monad.Reader
23 import qualified Data.List as List
24 import qualified Data.Map as Map
25 import Data.Map (Map, toList)
26 import qualified Data.Map.Strict.Patch as PM
27 import Data.Maybe (catMaybes)
28 import Data.Text (Text)
29
30 import Gargantext.API.Ngrams.Types (HasRepoSaver(..), NgramsElement(..), NgramsPatch(..), NgramsRepoElement(..), NgramsTablePatch(..), NgramsTerm(..), RepoCmdM, ne_ngrams, ngramsElementToRepo, r_history, r_state, r_version, repoVar)
31 import Gargantext.Core.Flow.Types
32 import Gargantext.Core.Types (HasInvalidError(..), assertValid)
33 import Gargantext.Core.Types.Main (ListType(CandidateTerm))
34 import Gargantext.Core.Utils (something)
35 import Gargantext.Database.Action.Flow.Types
36 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
37 import Gargantext.Database.Admin.Types.Node
38 import Gargantext.Database.Query.Table.Node_NodeNgramsNodeNgrams
39 import Gargantext.Database.Query.Table.NodeNgrams (NodeNgramsPoly(..), NodeNgramsW, listInsertDb, getCgramsId)
40 -- import Gargantext.Database.Schema.Ngrams -- (insertNgrams, Ngrams(..), NgramsIndexed(..), indexNgrams, NgramsType(..), text2ngrams, ngramsTypeId)
41 import Gargantext.Database.Schema.Ngrams (Ngrams(..), NgramsType(..))
42 import Gargantext.Prelude
43
44 -- FLOW LIST
45 -- 1. select specific terms of the corpus when compared with others langs
46 -- (for now, suppose english)
47 -- 2. select specific terms of the corpus when compared with others corpora (same database)
48 -- 3. select clusters of terms (generic and specific)
49
50 {-
51 data FlowList = FlowListLang
52 | FlowListTficf
53 | FlowListSpeGen
54
55
56 flowList_Tficf :: UserCorpusId
57 -> MasterCorpusId
58 -> NgramsType
59 -> (Text -> Text)
60 -> Cmd err (Map Text (Double, Set Text))
61 flowList_Tficf u m nt f = do
62
63 u' <- Map.filter (\s -> Set.size s > 1) <$> getNodesByNgramsUser u nt
64 m' <- Map.filter (\s -> Set.size s > 1) <$> getNodesByNgramsMaster u m
65
66 pure $ sortTficf Down
67 $ toTficfData (countNodesByNgramsWith f u')
68 (countNodesByNgramsWith f m')
69
70 flowList_Tficf' :: UserCorpusId
71 -> MasterCorpusId
72 -> NgramsType
73 -> Cmd err (Map Text (Double, Set Text))
74 flowList_Tficf' u m nt f = do
75
76 u' <- Map.filter (\s -> Set.size s > 1) <$> getNodesByNgramsUser u nt
77 m' <- Map.filter (\s -> Set.size s > 1) <$> getNodesByNgramsMaster u m
78
79 pure $ sortTficf Down
80 $ toTficfData (countNodesByNgramsWith f u')
81 (countNodesByNgramsWith f m')
82
83 -}
84
85
86 -- | TODO check optimization
87 mapNodeIdNgrams :: [DocumentIdWithNgrams a]
88 -> Map Ngrams (Map NgramsType (Map NodeId Int))
89 mapNodeIdNgrams = Map.unionsWith (Map.unionWith (Map.unionWith (+))) . fmap f
90 where
91 f :: DocumentIdWithNgrams a
92 -> Map Ngrams (Map NgramsType (Map NodeId Int))
93 f d = fmap (fmap (Map.singleton nId)) $ documentNgrams d
94 where
95 nId = documentId $ documentWithId d
96
97 ------------------------------------------------------------------------
98 flowList_DbRepo :: FlowCmdM env err m
99 => ListId
100 -> Map NgramsType [NgramsElement]
101 -> m ListId
102 flowList_DbRepo lId ngs = do
103 -- printDebug "listId flowList" lId
104 mapCgramsId <- listInsertDb lId toNodeNgramsW (Map.toList ngs)
105 let toInsert = catMaybes [ (,) <$> (getCgramsId mapCgramsId ntype <$> (unNgramsTerm <$> parent))
106 <*> getCgramsId mapCgramsId ntype ngram
107 | (ntype, ngs') <- Map.toList ngs
108 , NgramsElement (NgramsTerm ngram) _ _ _ _ parent _ <- ngs'
109 ]
110 -- Inserting groups of ngrams
111 _r <- insert_Node_NodeNgrams_NodeNgrams
112 $ map (\(a,b) -> Node_NodeNgrams_NodeNgrams lId a b Nothing) toInsert
113
114 listInsert lId ngs
115
116 --trace (show $ List.filter (\n -> _ne_ngrams n == "versatile") $ List.concat $ Map.elems ngs) $ listInsert lId ngs
117 pure lId
118 ------------------------------------------------------------------------
119 ------------------------------------------------------------------------
120
121 toNodeNgramsW :: ListId
122 -> [(NgramsType, [NgramsElement])]
123 -> [NodeNgramsW]
124 toNodeNgramsW l ngs = List.concat $ map (toNodeNgramsW'' l) ngs
125 where
126 toNodeNgramsW'' :: ListId
127 -> (NgramsType, [NgramsElement])
128 -> [NodeNgramsW]
129 toNodeNgramsW'' l' (ngrams_type, elms) =
130 [ NodeNgrams Nothing l' list_type ngrams_terms' ngrams_type Nothing Nothing Nothing 0 |
131 (NgramsElement (NgramsTerm ngrams_terms') _size list_type _occ _root _parent _children) <- elms
132 ]
133
134
135 toNodeNgramsW' :: ListId
136 -> [(Text, [NgramsType])]
137 -> [NodeNgramsW]
138 toNodeNgramsW' l'' ngs = [ NodeNgrams Nothing l'' CandidateTerm terms ngrams_type Nothing Nothing Nothing 0
139 | (terms, ngrams_types) <- ngs
140 , ngrams_type <- ngrams_types
141 ]
142
143
144 listInsert :: FlowCmdM env err m
145 => ListId
146 -> Map NgramsType [NgramsElement]
147 -> m ()
148 listInsert lId ngs = mapM_ (\(typeList, ngElmts)
149 -> putListNgrams lId typeList ngElmts) (toList ngs)
150
151 ------------------------------------------------------------------------
152 ------------------------------------------------------------------------
153
154
155 -- NOTE
156 -- This is no longer part of the API.
157 -- This function is maintained for its usage in Database.Action.Flow.List.
158 -- If the given list of ngrams elements contains ngrams already in
159 -- the repo, they will be ignored.
160 putListNgrams :: (HasInvalidError err, RepoCmdM env err m)
161 => NodeId
162 -> TableNgrams.NgramsType
163 -> [NgramsElement]
164 -> m ()
165 putListNgrams _ _ [] = pure ()
166 putListNgrams nodeId ngramsType nes = putListNgrams' nodeId ngramsType m
167 where
168 m = Map.fromList $ map (\n -> (n ^. ne_ngrams, ngramsElementToRepo n)) nes
169
170 putListNgrams' :: (HasInvalidError err, RepoCmdM env err m)
171 => NodeId
172 -> TableNgrams.NgramsType
173 -> Map NgramsTerm NgramsRepoElement
174 -> m ()
175 putListNgrams' nodeId ngramsType ns = do
176 -- printDebug "[putListNgrams'] nodeId" nodeId
177 -- printDebug "[putListNgrams'] ngramsType" ngramsType
178 -- printDebug "[putListNgrams'] ns" ns
179
180 let p1 = NgramsTablePatch . PM.fromMap $ NgramsReplace Nothing . Just <$> ns
181 (p0, p0_validity) = PM.singleton nodeId p1
182 (p, p_validity) = PM.singleton ngramsType p0
183 assertValid p0_validity
184 assertValid p_validity
185 {-
186 -- TODO
187 v <- currentVersion
188 q <- commitStatePatch (Versioned v p)
189 assert empty q
190 -- What if another commit comes in between?
191 -- Shall we have a blindCommitStatePatch? It would not ask for a version but just a patch.
192 -- The modifyMVar_ would test the patch with applicable first.
193 -- If valid the rest would be atomic and no merge is required.
194 -}
195 var <- view repoVar
196 liftBase $ modifyMVar_ var $ \r -> do
197 pure $ r & r_version +~ 1
198 & r_history %~ (p :)
199 & r_state . at ngramsType %~
200 (Just .
201 (at nodeId %~
202 ( Just
203 . (<> ns)
204 . something
205 )
206 )
207 . something
208 )
209 saveRepo
210
211
212 saveRepo :: ( MonadReader env m, MonadBase IO m, HasRepoSaver env )
213 => m ()
214 saveRepo = liftBase =<< view repoSaver