]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams/Tools.hs
[FIX] temp resolution of commitpatch
[gargantext.git] / src / Gargantext / API / Ngrams / Tools.hs
1 {-|
2 Module : Gargantext.API.Ngrams.Tools
3 Description : Tools to manage Ngrams Elements (from the API)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE TypeFamilies #-}
13
14 module Gargantext.API.Ngrams.Tools
15 where
16
17 import Control.Concurrent
18 import Control.Lens (_Just, (^.), at, view, At, Index, IxValue)
19 import Control.Monad.Reader
20 import Data.HashMap.Strict (HashMap)
21 import Data.Hashable (Hashable)
22 import Data.Set (Set)
23 import Data.Validity
24 import Gargantext.API.Ngrams.Types
25 import Gargantext.Core.Types (ListType(..), NodeId, ListId)
26 import Gargantext.Database.Schema.Ngrams (NgramsType)
27 import Gargantext.Prelude
28 import qualified Data.HashMap.Strict as HM
29 import qualified Data.Map.Strict as Map
30 import qualified Data.Set as Set
31 import qualified Data.List as List
32 import Gargantext.Core.NodeStory
33
34 mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
35 mergeNgramsElement _neOld neNew = neNew
36
37 type RootTerm = NgramsTerm
38
39 getRepo :: RepoCmdM env err m => m NgramsRepo
40 getRepo = do
41 v <- view repoVar
42 liftBase $ readMVar v
43
44 getRepo' :: HasNodeStory env err m
45 => [ListId] -> m NodeListStory
46 getRepo' listIds = do
47 maybeNodeListStory <- head <$> List.reverse <$> mapM getNodeListStory'' listIds
48 case maybeNodeListStory of
49 Nothing -> panic "[G.A.N.Tools.getRepo']"
50 Just nls -> pure nls
51
52 getRepoVar :: HasNodeStory env err m
53 => ListId -> m (MVar NodeListStory)
54 getRepoVar l = do
55 f <- getNodeListStory
56 v <- liftBase $ f l
57 pure v
58
59 getNodeListStory :: HasNodeStory env err m
60 => m (NodeId -> IO (MVar NodeListStory))
61 getNodeListStory = do
62 env <- view hasNodeStory
63 pure $ view nse_getter env
64
65 getNodeListStory' :: HasNodeStory env err m
66 => NodeId -> m (IO NodeListStory)
67 getNodeListStory' n = do
68 f <- getNodeListStory
69 v <- liftBase $ f n
70 pure $ readMVar v
71
72 getNodeListStory'' :: HasNodeStory env err m
73 => NodeId -> m NodeListStory
74 getNodeListStory'' n = do
75 f <- getNodeListStory
76 v <- liftBase $ f n
77 v' <- liftBase $ readMVar v
78 pure $ v'
79
80
81
82 listNgramsFromRepo :: [ListId] -> NgramsType
83 -> NgramsRepo -> HashMap NgramsTerm NgramsRepoElement
84 listNgramsFromRepo nodeIds ngramsType repo = ngrams
85 where
86 ngramsMap = repo ^. r_state . at ngramsType . _Just
87
88 -- TODO HashMap linked
89 ngrams = HM.fromList $ Map.toList $ Map.unionsWith mergeNgramsElement
90 [ ngramsMap ^. at nodeId . _Just | nodeId <- nodeIds ]
91
92
93 listNgramsFromRepo' :: [ListId] -> NgramsType
94 -> NodeListStory -> HashMap NgramsTerm NgramsRepoElement
95 listNgramsFromRepo' nodeIds ngramsType repo =
96 HM.fromList $ Map.toList
97 $ Map.unionsWith mergeNgramsElement ngrams
98 where
99 ngrams = [ repo
100 ^. unNodeStory
101 . at nodeId . _Just
102 . a_state
103 . at ngramsType . _Just
104 | nodeId <- nodeIds
105 ]
106
107
108
109 -- TODO-ACCESS: We want to do the security check before entering here.
110 -- Add a static capability parameter would be nice.
111 -- Ideally this is the access to `repoVar` which needs to
112 -- be properly guarded.
113 getListNgrams :: RepoCmdM env err m
114 => [ListId] -> NgramsType
115 -> m (HashMap NgramsTerm NgramsRepoElement)
116 getListNgrams nodeIds ngramsType = listNgramsFromRepo nodeIds ngramsType <$> getRepo
117
118 getListNgrams' :: HasNodeStory env err m
119 => [ListId] -> NgramsType
120 -> m (HashMap NgramsTerm NgramsRepoElement)
121 getListNgrams' nodeIds ngramsType = listNgramsFromRepo' nodeIds ngramsType
122 <$> getRepo' nodeIds
123
124
125 getTermsWith :: (RepoCmdM env err m, Eq a, Hashable a)
126 => (NgramsTerm -> a) -> [ListId]
127 -> NgramsType -> Set ListType
128 -> m (HashMap a [a])
129 getTermsWith f ls ngt lts = HM.fromListWith (<>)
130 <$> map toTreeWith
131 <$> HM.toList
132 <$> HM.filter (\f' -> Set.member (fst f') lts)
133 <$> mapTermListRoot ls ngt
134 <$> getRepo
135 where
136 toTreeWith (t, (_lt, maybeRoot)) = case maybeRoot of
137 Nothing -> (f t, [])
138 Just r -> (f r, [f t])
139 getTermsWith' :: (HasNodeStory env err m, Eq a, Hashable a)
140 => (NgramsTerm -> a) -> [ListId]
141 -> NgramsType -> Set ListType
142 -> m (HashMap a [a])
143 getTermsWith' f ls ngt lts = HM.fromListWith (<>)
144 <$> map toTreeWith
145 <$> HM.toList
146 <$> HM.filter (\f' -> Set.member (fst f') lts)
147 <$> mapTermListRoot' ls ngt
148 <$> getRepo' ls
149 where
150 toTreeWith (t, (_lt, maybeRoot)) = case maybeRoot of
151 Nothing -> (f t, [])
152 Just r -> (f r, [f t])
153
154
155
156
157 mapTermListRoot :: [ListId]
158 -> NgramsType
159 -> NgramsRepo
160 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
161 mapTermListRoot nodeIds ngramsType repo =
162 (\nre -> (_nre_list nre, _nre_root nre))
163 <$> listNgramsFromRepo nodeIds ngramsType repo
164 mapTermListRoot' :: [ListId]
165 -> NgramsType
166 -> NodeListStory
167 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
168 mapTermListRoot' nodeIds ngramsType repo =
169 (\nre -> (_nre_list nre, _nre_root nre))
170 <$> listNgramsFromRepo' nodeIds ngramsType repo
171
172
173
174
175
176 filterListWithRootHashMap :: ListType
177 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
178 -> HashMap NgramsTerm (Maybe RootTerm)
179 filterListWithRootHashMap lt m = snd <$> HM.filter isMapTerm m
180 where
181 isMapTerm (l, maybeRoot) = case maybeRoot of
182 Nothing -> l == lt
183 Just r -> case HM.lookup r m of
184 Nothing -> panic $ "[Garg.API.Ngrams.Tools] filterWithRoot, unknown key: " <> unNgramsTerm r
185 Just (l',_) -> l' == lt
186
187 filterListWithRoot :: ListType
188 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
189 -> HashMap NgramsTerm (Maybe RootTerm)
190 filterListWithRoot lt m = snd <$> HM.filter isMapTerm m
191 where
192 isMapTerm (l, maybeRoot) = case maybeRoot of
193 Nothing -> l == lt
194 Just r -> case HM.lookup r m of
195 Nothing -> panic $ "[Garg.API.Ngrams.Tools] filterWithRoot, unknown key: " <> unNgramsTerm r
196 Just (l',_) -> l' == lt
197
198 groupNodesByNgrams :: ( At root_map
199 , Index root_map ~ NgramsTerm
200 , IxValue root_map ~ Maybe RootTerm
201 )
202 => root_map
203 -> HashMap NgramsTerm (Set NodeId)
204 -> HashMap NgramsTerm (Set NodeId)
205 groupNodesByNgrams syn occs = HM.fromListWith (<>) occs'
206 where
207 occs' = map toSyn (HM.toList occs)
208 toSyn (t,ns) = case syn ^. at t of
209 Nothing -> panic $ "[Garg.API.Ngrams.Tools.groupNodesByNgrams] unknown key: " <> unNgramsTerm t
210 Just r -> case r of
211 Nothing -> (t, ns)
212 Just r' -> (r',ns)
213
214 data Diagonal = Diagonal Bool
215
216 getCoocByNgrams :: Diagonal
217 -> HashMap NgramsTerm (Set NodeId)
218 -> HashMap (NgramsTerm, NgramsTerm) Int
219 getCoocByNgrams = getCoocByNgrams' identity
220
221
222 getCoocByNgrams' :: (Hashable a, Ord a, Ord c)
223 => (b -> Set c)
224 -> Diagonal
225 -> HashMap a b
226 -> HashMap (a, a) Int
227 getCoocByNgrams' f (Diagonal diag) m =
228 HM.fromList [( (t1,t2)
229 , maybe 0 Set.size $ Set.intersection
230 <$> (fmap f $ HM.lookup t1 m)
231 <*> (fmap f $ HM.lookup t2 m)
232 )
233 | (t1,t2) <- if diag then
234 [ (x,y) | x <- ks, y <- ks, x <= y] -- TODO if we keep a Data.Map here it might be
235 -- more efficient to enumerate all the y <= x.
236 else
237 listToCombi identity ks
238 ]
239
240 where ks = HM.keys m
241
242 ------------------------------------------