2 Module : Gargantext.API.Ngrams.Tools
3 Description : Tools to manage Ngrams Elements (from the API)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE TypeFamilies #-}
14 module Gargantext.API.Ngrams.Tools
17 import Control.Concurrent
18 import Control.Lens (_Just, (^.), at, view, At, Index, IxValue)
19 import Control.Monad.Reader
20 import Data.HashMap.Strict (HashMap)
21 import Data.Hashable (Hashable)
24 import Gargantext.API.Ngrams.Types
25 import Gargantext.Core.Types (ListType(..), NodeId, ListId)
26 import Gargantext.Database.Schema.Ngrams (NgramsType)
27 import Gargantext.Prelude
28 import qualified Data.HashMap.Strict as HM
29 import qualified Data.Map.Strict as Map
30 import qualified Data.Set as Set
31 import qualified Data.List as List
32 import Gargantext.Core.NodeStory
34 mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
35 mergeNgramsElement _neOld neNew = neNew
37 type RootTerm = NgramsTerm
39 getRepo :: RepoCmdM env err m => m NgramsRepo
44 getRepo' :: HasNodeStory env err m
45 => [ListId] -> m NodeListStory
47 maybeNodeListStory <- head <$> List.reverse <$> mapM getNodeListStory'' listIds
48 case maybeNodeListStory of
49 Nothing -> panic "[G.A.N.Tools.getRepo']"
53 getNodeListStory :: HasNodeStory env err m
54 => m (NodeId -> IO (MVar NodeListStory))
56 env <- view hasNodeStory
57 pure $ view nse_getter env
59 getNodeListStory' :: HasNodeStory env err m
60 => NodeId -> m (IO NodeListStory)
61 getNodeListStory' n = do
66 getNodeListStory'' :: HasNodeStory env err m
67 => NodeId -> m NodeListStory
68 getNodeListStory'' n = do
71 v' <- liftBase $ readMVar v
76 listNgramsFromRepo :: [ListId] -> NgramsType
77 -> NgramsRepo -> HashMap NgramsTerm NgramsRepoElement
78 listNgramsFromRepo nodeIds ngramsType repo = ngrams
80 ngramsMap = repo ^. r_state . at ngramsType . _Just
82 -- TODO HashMap linked
83 ngrams = HM.fromList $ Map.toList $ Map.unionsWith mergeNgramsElement
84 [ ngramsMap ^. at nodeId . _Just | nodeId <- nodeIds ]
87 listNgramsFromRepo' :: [ListId] -> NgramsType
88 -> NodeListStory -> HashMap NgramsTerm NgramsRepoElement
89 listNgramsFromRepo' nodeIds ngramsType repo =
90 HM.fromList $ Map.toList
91 $ Map.unionsWith mergeNgramsElement ngrams
97 . at ngramsType . _Just
103 -- TODO-ACCESS: We want to do the security check before entering here.
104 -- Add a static capability parameter would be nice.
105 -- Ideally this is the access to `repoVar` which needs to
106 -- be properly guarded.
107 getListNgrams :: RepoCmdM env err m
108 => [ListId] -> NgramsType
109 -> m (HashMap NgramsTerm NgramsRepoElement)
110 getListNgrams nodeIds ngramsType = listNgramsFromRepo nodeIds ngramsType <$> getRepo
112 getListNgrams' :: HasNodeStory env err m
113 => [ListId] -> NgramsType
114 -> m (HashMap NgramsTerm NgramsRepoElement)
115 getListNgrams' nodeIds ngramsType = listNgramsFromRepo' nodeIds ngramsType
119 getTermsWith :: (RepoCmdM env err m, Eq a, Hashable a)
120 => (NgramsTerm -> a) -> [ListId]
121 -> NgramsType -> Set ListType
123 getTermsWith f ls ngt lts = HM.fromListWith (<>)
126 <$> HM.filter (\f' -> Set.member (fst f') lts)
127 <$> mapTermListRoot ls ngt
130 toTreeWith (t, (_lt, maybeRoot)) = case maybeRoot of
132 Just r -> (f r, [f t])
133 getTermsWith' :: (HasNodeStory env err m, Eq a, Hashable a)
134 => (NgramsTerm -> a) -> [ListId]
135 -> NgramsType -> Set ListType
137 getTermsWith' f ls ngt lts = HM.fromListWith (<>)
140 <$> HM.filter (\f' -> Set.member (fst f') lts)
141 <$> mapTermListRoot' ls ngt
144 toTreeWith (t, (_lt, maybeRoot)) = case maybeRoot of
146 Just r -> (f r, [f t])
151 mapTermListRoot :: [ListId]
154 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
155 mapTermListRoot nodeIds ngramsType repo =
156 (\nre -> (_nre_list nre, _nre_root nre))
157 <$> listNgramsFromRepo nodeIds ngramsType repo
158 mapTermListRoot' :: [ListId]
161 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
162 mapTermListRoot' nodeIds ngramsType repo =
163 (\nre -> (_nre_list nre, _nre_root nre))
164 <$> listNgramsFromRepo' nodeIds ngramsType repo
170 filterListWithRootHashMap :: ListType
171 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
172 -> HashMap NgramsTerm (Maybe RootTerm)
173 filterListWithRootHashMap lt m = snd <$> HM.filter isMapTerm m
175 isMapTerm (l, maybeRoot) = case maybeRoot of
177 Just r -> case HM.lookup r m of
178 Nothing -> panic $ "[Garg.API.Ngrams.Tools] filterWithRoot, unknown key: " <> unNgramsTerm r
179 Just (l',_) -> l' == lt
181 filterListWithRoot :: ListType
182 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
183 -> HashMap NgramsTerm (Maybe RootTerm)
184 filterListWithRoot lt m = snd <$> HM.filter isMapTerm m
186 isMapTerm (l, maybeRoot) = case maybeRoot of
188 Just r -> case HM.lookup r m of
189 Nothing -> panic $ "[Garg.API.Ngrams.Tools] filterWithRoot, unknown key: " <> unNgramsTerm r
190 Just (l',_) -> l' == lt
192 groupNodesByNgrams :: ( At root_map
193 , Index root_map ~ NgramsTerm
194 , IxValue root_map ~ Maybe RootTerm
197 -> HashMap NgramsTerm (Set NodeId)
198 -> HashMap NgramsTerm (Set NodeId)
199 groupNodesByNgrams syn occs = HM.fromListWith (<>) occs'
201 occs' = map toSyn (HM.toList occs)
202 toSyn (t,ns) = case syn ^. at t of
203 Nothing -> panic $ "[Garg.API.Ngrams.Tools.groupNodesByNgrams] unknown key: " <> unNgramsTerm t
208 data Diagonal = Diagonal Bool
210 getCoocByNgrams :: Diagonal
211 -> HashMap NgramsTerm (Set NodeId)
212 -> HashMap (NgramsTerm, NgramsTerm) Int
213 getCoocByNgrams = getCoocByNgrams' identity
216 getCoocByNgrams' :: (Hashable a, Ord a, Ord c)
220 -> HashMap (a, a) Int
221 getCoocByNgrams' f (Diagonal diag) m =
222 HM.fromList [( (t1,t2)
223 , maybe 0 Set.size $ Set.intersection
224 <$> (fmap f $ HM.lookup t1 m)
225 <*> (fmap f $ HM.lookup t2 m)
227 | (t1,t2) <- if diag then
228 [ (x,y) | x <- ks, y <- ks, x <= y] -- TODO if we keep a Data.Map here it might be
229 -- more efficient to enumerate all the y <= x.
231 listToCombi identity ks
236 ------------------------------------------