]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams/Tools.hs
[NodeStory] Tools updates (WIP)
[gargantext.git] / src / Gargantext / API / Ngrams / Tools.hs
1 {-|
2 Module : Gargantext.API.Ngrams.Tools
3 Description : Tools to manage Ngrams Elements (from the API)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE TypeFamilies #-}
13
14 module Gargantext.API.Ngrams.Tools
15 where
16
17 import Control.Concurrent
18 import Control.Lens (_Just, (^.), at, view, At, Index, IxValue)
19 import Control.Monad.Reader
20 import Data.HashMap.Strict (HashMap)
21 import Data.Hashable (Hashable)
22 import Data.Set (Set)
23 import Data.Validity
24 import Gargantext.API.Ngrams.Types
25 import Gargantext.Core.Types (ListType(..), NodeId, ListId)
26 import Gargantext.Database.Schema.Ngrams (NgramsType)
27 import Gargantext.Prelude
28 import qualified Data.HashMap.Strict as HM
29 import qualified Data.Map.Strict as Map
30 import qualified Data.Set as Set
31 import qualified Data.List as List
32 import Gargantext.Core.NodeStory
33
34 mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
35 mergeNgramsElement _neOld neNew = neNew
36
37 type RootTerm = NgramsTerm
38
39 getRepo :: RepoCmdM env err m => m NgramsRepo
40 getRepo = do
41 v <- view repoVar
42 liftBase $ readMVar v
43
44 getRepo' :: HasNodeStory env err m
45 => [ListId] -> m NodeListStory
46 getRepo' listIds = do
47 maybeNodeListStory <- head <$> List.reverse <$> mapM getNodeListStory'' listIds
48 case maybeNodeListStory of
49 Nothing -> panic "[G.A.N.Tools.getRepo']"
50 Just nls -> pure nls
51
52
53 getNodeListStory :: HasNodeStory env err m
54 => m (NodeId -> IO (MVar NodeListStory))
55 getNodeListStory = do
56 env <- view hasNodeStory
57 pure $ view nse_getter env
58
59 getNodeListStory' :: HasNodeStory env err m
60 => NodeId -> m (IO NodeListStory)
61 getNodeListStory' n = do
62 f <- getNodeListStory
63 v <- liftBase $ f n
64 pure $ readMVar v
65
66 getNodeListStory'' :: HasNodeStory env err m
67 => NodeId -> m NodeListStory
68 getNodeListStory'' n = do
69 f <- getNodeListStory
70 v <- liftBase $ f n
71 v' <- liftBase $ readMVar v
72 pure $ v'
73
74
75
76 listNgramsFromRepo :: [ListId] -> NgramsType
77 -> NgramsRepo -> HashMap NgramsTerm NgramsRepoElement
78 listNgramsFromRepo nodeIds ngramsType repo = ngrams
79 where
80 ngramsMap = repo ^. r_state . at ngramsType . _Just
81
82 -- TODO HashMap linked
83 ngrams = HM.fromList $ Map.toList $ Map.unionsWith mergeNgramsElement
84 [ ngramsMap ^. at nodeId . _Just | nodeId <- nodeIds ]
85
86
87 listNgramsFromRepo' :: [ListId] -> NgramsType
88 -> NodeListStory -> HashMap NgramsTerm NgramsRepoElement
89 listNgramsFromRepo' nodeIds ngramsType repo =
90 HM.fromList $ Map.toList
91 $ Map.unionsWith mergeNgramsElement ngrams
92 where
93 ngrams = [ repo
94 ^. unNodeStory
95 . at nodeId . _Just
96 . a_state
97 . at ngramsType . _Just
98 | nodeId <- nodeIds
99 ]
100
101
102
103 -- TODO-ACCESS: We want to do the security check before entering here.
104 -- Add a static capability parameter would be nice.
105 -- Ideally this is the access to `repoVar` which needs to
106 -- be properly guarded.
107 getListNgrams :: RepoCmdM env err m
108 => [ListId] -> NgramsType
109 -> m (HashMap NgramsTerm NgramsRepoElement)
110 getListNgrams nodeIds ngramsType = listNgramsFromRepo nodeIds ngramsType <$> getRepo
111
112 getListNgrams' :: HasNodeStory env err m
113 => [ListId] -> NgramsType
114 -> m (HashMap NgramsTerm NgramsRepoElement)
115 getListNgrams' nodeIds ngramsType = listNgramsFromRepo' nodeIds ngramsType
116 <$> getRepo' nodeIds
117
118
119 getTermsWith :: (RepoCmdM env err m, Eq a, Hashable a)
120 => (NgramsTerm -> a) -> [ListId]
121 -> NgramsType -> Set ListType
122 -> m (HashMap a [a])
123 getTermsWith f ls ngt lts = HM.fromListWith (<>)
124 <$> map toTreeWith
125 <$> HM.toList
126 <$> HM.filter (\f' -> Set.member (fst f') lts)
127 <$> mapTermListRoot ls ngt
128 <$> getRepo
129 where
130 toTreeWith (t, (_lt, maybeRoot)) = case maybeRoot of
131 Nothing -> (f t, [])
132 Just r -> (f r, [f t])
133 getTermsWith' :: (HasNodeStory env err m, Eq a, Hashable a)
134 => (NgramsTerm -> a) -> [ListId]
135 -> NgramsType -> Set ListType
136 -> m (HashMap a [a])
137 getTermsWith' f ls ngt lts = HM.fromListWith (<>)
138 <$> map toTreeWith
139 <$> HM.toList
140 <$> HM.filter (\f' -> Set.member (fst f') lts)
141 <$> mapTermListRoot' ls ngt
142 <$> getRepo' ls
143 where
144 toTreeWith (t, (_lt, maybeRoot)) = case maybeRoot of
145 Nothing -> (f t, [])
146 Just r -> (f r, [f t])
147
148
149
150
151 mapTermListRoot :: [ListId]
152 -> NgramsType
153 -> NgramsRepo
154 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
155 mapTermListRoot nodeIds ngramsType repo =
156 (\nre -> (_nre_list nre, _nre_root nre))
157 <$> listNgramsFromRepo nodeIds ngramsType repo
158 mapTermListRoot' :: [ListId]
159 -> NgramsType
160 -> NodeListStory
161 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
162 mapTermListRoot' nodeIds ngramsType repo =
163 (\nre -> (_nre_list nre, _nre_root nre))
164 <$> listNgramsFromRepo' nodeIds ngramsType repo
165
166
167
168
169
170 filterListWithRootHashMap :: ListType
171 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
172 -> HashMap NgramsTerm (Maybe RootTerm)
173 filterListWithRootHashMap lt m = snd <$> HM.filter isMapTerm m
174 where
175 isMapTerm (l, maybeRoot) = case maybeRoot of
176 Nothing -> l == lt
177 Just r -> case HM.lookup r m of
178 Nothing -> panic $ "[Garg.API.Ngrams.Tools] filterWithRoot, unknown key: " <> unNgramsTerm r
179 Just (l',_) -> l' == lt
180
181 filterListWithRoot :: ListType
182 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
183 -> HashMap NgramsTerm (Maybe RootTerm)
184 filterListWithRoot lt m = snd <$> HM.filter isMapTerm m
185 where
186 isMapTerm (l, maybeRoot) = case maybeRoot of
187 Nothing -> l == lt
188 Just r -> case HM.lookup r m of
189 Nothing -> panic $ "[Garg.API.Ngrams.Tools] filterWithRoot, unknown key: " <> unNgramsTerm r
190 Just (l',_) -> l' == lt
191
192 groupNodesByNgrams :: ( At root_map
193 , Index root_map ~ NgramsTerm
194 , IxValue root_map ~ Maybe RootTerm
195 )
196 => root_map
197 -> HashMap NgramsTerm (Set NodeId)
198 -> HashMap NgramsTerm (Set NodeId)
199 groupNodesByNgrams syn occs = HM.fromListWith (<>) occs'
200 where
201 occs' = map toSyn (HM.toList occs)
202 toSyn (t,ns) = case syn ^. at t of
203 Nothing -> panic $ "[Garg.API.Ngrams.Tools.groupNodesByNgrams] unknown key: " <> unNgramsTerm t
204 Just r -> case r of
205 Nothing -> (t, ns)
206 Just r' -> (r',ns)
207
208 data Diagonal = Diagonal Bool
209
210 getCoocByNgrams :: Diagonal
211 -> HashMap NgramsTerm (Set NodeId)
212 -> HashMap (NgramsTerm, NgramsTerm) Int
213 getCoocByNgrams = getCoocByNgrams' identity
214
215
216 getCoocByNgrams' :: (Hashable a, Ord a, Ord c)
217 => (b -> Set c)
218 -> Diagonal
219 -> HashMap a b
220 -> HashMap (a, a) Int
221 getCoocByNgrams' f (Diagonal diag) m =
222 HM.fromList [( (t1,t2)
223 , maybe 0 Set.size $ Set.intersection
224 <$> (fmap f $ HM.lookup t1 m)
225 <*> (fmap f $ HM.lookup t2 m)
226 )
227 | (t1,t2) <- if diag then
228 [ (x,y) | x <- ks, y <- ks, x <= y] -- TODO if we keep a Data.Map here it might be
229 -- more efficient to enumerate all the y <= x.
230 else
231 listToCombi identity ks
232 ]
233
234 where ks = HM.keys m
235
236 ------------------------------------------