2 Module : Gargantext.API.Ngrams.Tools
3 Description : Tools to manage Ngrams Elements (from the API)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE TypeFamilies #-}
14 module Gargantext.API.Ngrams.Tools
17 import Control.Concurrent
18 import Control.Lens (_Just, (^.), at, view, At, Index, IxValue)
19 import Control.Monad.Reader
20 import Data.HashMap.Strict (HashMap)
21 import Data.Hashable (Hashable)
24 import Gargantext.API.Ngrams.Types
25 import Gargantext.Core.Types (ListType(..), NodeId, ListId)
26 import Gargantext.Database.Schema.Ngrams (NgramsType)
27 import Gargantext.Prelude
28 import qualified Data.HashMap.Strict as HM
29 import qualified Data.Map.Strict as Map
30 import qualified Data.Set as Set
31 import qualified Data.List as List
32 import Gargantext.Core.NodeStory
34 mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
35 mergeNgramsElement _neOld neNew = neNew
37 type RootTerm = NgramsTerm
39 getRepo :: RepoCmdM env err m => m NgramsRepo
44 getRepo' :: HasNodeStory env err m
45 => [ListId] -> m NodeListStory
47 maybeNodeListStory <- head <$> List.reverse <$> mapM getNodeListStory'' listIds
48 case maybeNodeListStory of
49 Nothing -> panic "[G.A.N.Tools.getRepo']"
52 getRepoVar :: HasNodeStory env err m
53 => ListId -> m (MVar NodeListStory)
59 getNodeListStory :: HasNodeStory env err m
60 => m (NodeId -> IO (MVar NodeListStory))
62 env <- view hasNodeStory
63 pure $ view nse_getter env
65 getNodeListStory' :: HasNodeStory env err m
66 => NodeId -> m (IO NodeListStory)
67 getNodeListStory' n = do
72 getNodeListStory'' :: HasNodeStory env err m
73 => NodeId -> m NodeListStory
74 getNodeListStory'' n = do
77 v' <- liftBase $ readMVar v
82 listNgramsFromRepo :: [ListId] -> NgramsType
83 -> NgramsRepo -> HashMap NgramsTerm NgramsRepoElement
84 listNgramsFromRepo nodeIds ngramsType repo = ngrams
86 ngramsMap = repo ^. r_state . at ngramsType . _Just
88 -- TODO HashMap linked
89 ngrams = HM.fromList $ Map.toList $ Map.unionsWith mergeNgramsElement
90 [ ngramsMap ^. at nodeId . _Just | nodeId <- nodeIds ]
93 listNgramsFromRepo' :: [ListId] -> NgramsType
94 -> NodeListStory -> HashMap NgramsTerm NgramsRepoElement
95 listNgramsFromRepo' nodeIds ngramsType repo =
96 HM.fromList $ Map.toList
97 $ Map.unionsWith mergeNgramsElement ngrams
103 . at ngramsType . _Just
109 -- TODO-ACCESS: We want to do the security check before entering here.
110 -- Add a static capability parameter would be nice.
111 -- Ideally this is the access to `repoVar` which needs to
112 -- be properly guarded.
113 getListNgrams :: RepoCmdM env err m
114 => [ListId] -> NgramsType
115 -> m (HashMap NgramsTerm NgramsRepoElement)
116 getListNgrams nodeIds ngramsType = listNgramsFromRepo nodeIds ngramsType <$> getRepo
118 getListNgrams' :: HasNodeStory env err m
119 => [ListId] -> NgramsType
120 -> m (HashMap NgramsTerm NgramsRepoElement)
121 getListNgrams' nodeIds ngramsType = listNgramsFromRepo' nodeIds ngramsType
125 getTermsWith :: (RepoCmdM env err m, Eq a, Hashable a)
126 => (NgramsTerm -> a) -> [ListId]
127 -> NgramsType -> Set ListType
129 getTermsWith f ls ngt lts = HM.fromListWith (<>)
132 <$> HM.filter (\f' -> Set.member (fst f') lts)
133 <$> mapTermListRoot ls ngt
136 toTreeWith (t, (_lt, maybeRoot)) = case maybeRoot of
138 Just r -> (f r, [f t])
139 getTermsWith' :: (HasNodeStory env err m, Eq a, Hashable a)
140 => (NgramsTerm -> a) -> [ListId]
141 -> NgramsType -> Set ListType
143 getTermsWith' f ls ngt lts = HM.fromListWith (<>)
146 <$> HM.filter (\f' -> Set.member (fst f') lts)
147 <$> mapTermListRoot' ls ngt
150 toTreeWith (t, (_lt, maybeRoot)) = case maybeRoot of
152 Just r -> (f r, [f t])
157 mapTermListRoot :: [ListId]
160 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
161 mapTermListRoot nodeIds ngramsType repo =
162 (\nre -> (_nre_list nre, _nre_root nre))
163 <$> listNgramsFromRepo nodeIds ngramsType repo
164 mapTermListRoot' :: [ListId]
167 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
168 mapTermListRoot' nodeIds ngramsType repo =
169 (\nre -> (_nre_list nre, _nre_root nre))
170 <$> listNgramsFromRepo' nodeIds ngramsType repo
176 filterListWithRootHashMap :: ListType
177 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
178 -> HashMap NgramsTerm (Maybe RootTerm)
179 filterListWithRootHashMap lt m = snd <$> HM.filter isMapTerm m
181 isMapTerm (l, maybeRoot) = case maybeRoot of
183 Just r -> case HM.lookup r m of
184 Nothing -> panic $ "[Garg.API.Ngrams.Tools] filterWithRoot, unknown key: " <> unNgramsTerm r
185 Just (l',_) -> l' == lt
187 filterListWithRoot :: ListType
188 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
189 -> HashMap NgramsTerm (Maybe RootTerm)
190 filterListWithRoot lt m = snd <$> HM.filter isMapTerm m
192 isMapTerm (l, maybeRoot) = case maybeRoot of
194 Just r -> case HM.lookup r m of
195 Nothing -> panic $ "[Garg.API.Ngrams.Tools] filterWithRoot, unknown key: " <> unNgramsTerm r
196 Just (l',_) -> l' == lt
198 groupNodesByNgrams :: ( At root_map
199 , Index root_map ~ NgramsTerm
200 , IxValue root_map ~ Maybe RootTerm
203 -> HashMap NgramsTerm (Set NodeId)
204 -> HashMap NgramsTerm (Set NodeId)
205 groupNodesByNgrams syn occs = HM.fromListWith (<>) occs'
207 occs' = map toSyn (HM.toList occs)
208 toSyn (t,ns) = case syn ^. at t of
209 Nothing -> panic $ "[Garg.API.Ngrams.Tools.groupNodesByNgrams] unknown key: " <> unNgramsTerm t
214 data Diagonal = Diagonal Bool
216 getCoocByNgrams :: Diagonal
217 -> HashMap NgramsTerm (Set NodeId)
218 -> HashMap (NgramsTerm, NgramsTerm) Int
219 getCoocByNgrams = getCoocByNgrams' identity
222 getCoocByNgrams' :: (Hashable a, Ord a, Ord c)
226 -> HashMap (a, a) Int
227 getCoocByNgrams' f (Diagonal diag) m =
228 HM.fromList [( (t1,t2)
229 , maybe 0 Set.size $ Set.intersection
230 <$> (fmap f $ HM.lookup t1 m)
231 <*> (fmap f $ HM.lookup t2 m)
233 | (t1,t2) <- if diag then
234 [ (x,y) | x <- ks, y <- ks, x <= y] -- TODO if we keep a Data.Map here it might be
235 -- more efficient to enumerate all the y <= x.
237 listToCombi identity ks
242 ------------------------------------------