]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams/Tools.hs
[TOOLS] repoSize and renaming
[gargantext.git] / src / Gargantext / API / Ngrams / Tools.hs
1 {-|
2 Module : Gargantext.API.Ngrams.Tools
3 Description : Tools to manage Ngrams Elements (from the API)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE TypeFamilies #-}
13
14 module Gargantext.API.Ngrams.Tools
15 where
16
17 import Control.Concurrent
18 import Control.Lens (_Just, (^.), at, view, At, Index, IxValue)
19 import Control.Monad.Reader
20 import Data.HashMap.Strict (HashMap)
21 import Data.Hashable (Hashable)
22 import Data.Set (Set)
23 import Data.Validity
24 import Gargantext.API.Ngrams.Types
25 import Gargantext.Core.Types (ListType(..), NodeId, ListId)
26 import Gargantext.Database.Schema.Ngrams (NgramsType)
27 import Gargantext.Prelude
28 import qualified Data.HashMap.Strict as HM
29 import qualified Data.Map.Strict as Map
30 import qualified Data.Set as Set
31 import Gargantext.Core.NodeStory
32
33 mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
34 mergeNgramsElement _neOld neNew = neNew
35
36 type RootTerm = NgramsTerm
37
38
39 getRepo :: HasNodeStory env err m
40 => [ListId] -> m NodeListStory
41 getRepo listIds = do
42 f <- getNodeListStory
43 v <- liftBase $ f listIds
44 v' <- liftBase $ readMVar v
45 pure $ v'
46
47
48 repoSize :: Ord k1 => NodeStory (Map.Map k1 (Map.Map k2 a)) p
49 -> NodeId
50 -> Map.Map k1 Int
51 repoSize repo node_id = Map.map Map.size state
52 where
53 state = repo ^. unNodeStory
54 . at node_id . _Just
55 . a_state
56
57
58 getNodeStoryVar :: HasNodeStory env err m
59 => [ListId] -> m (MVar NodeListStory)
60 getNodeStoryVar l = do
61 f <- getNodeListStory
62 v <- liftBase $ f l
63 pure v
64
65
66 getNodeListStory :: HasNodeStory env err m
67 => m ([NodeId] -> IO (MVar NodeListStory))
68 getNodeListStory = do
69 env <- view hasNodeStory
70 pure $ view nse_getter env
71
72
73
74 listNgramsFromRepo :: [ListId]
75 -> NgramsType
76 -> NodeListStory
77 -> HashMap NgramsTerm NgramsRepoElement
78 listNgramsFromRepo nodeIds ngramsType repo =
79 HM.fromList $ Map.toList
80 $ Map.unionsWith mergeNgramsElement ngrams
81 where
82 ngrams = [ repo
83 ^. unNodeStory
84 . at nodeId . _Just
85 . a_state
86 . at ngramsType . _Just
87 | nodeId <- nodeIds
88 ]
89
90 -- TODO-ACCESS: We want to do the security check before entering here.
91 -- Add a static capability parameter would be nice.
92 -- Ideally this is the access to `repoVar` which needs to
93 -- be properly guarded.
94 getListNgrams :: HasNodeStory env err m
95 => [ListId] -> NgramsType
96 -> m (HashMap NgramsTerm NgramsRepoElement)
97 getListNgrams nodeIds ngramsType = listNgramsFromRepo nodeIds ngramsType
98 <$> getRepo nodeIds
99
100
101 getTermsWith :: (HasNodeStory env err m, Eq a, Hashable a)
102 => (NgramsTerm -> a) -> [ListId]
103 -> NgramsType -> Set ListType
104 -> m (HashMap a [a])
105 getTermsWith f ls ngt lts = HM.fromListWith (<>)
106 <$> map toTreeWith
107 <$> HM.toList
108 <$> HM.filter (\f' -> Set.member (fst f') lts)
109 <$> mapTermListRoot ls ngt
110 <$> getRepo ls
111 where
112 toTreeWith (t, (_lt, maybeRoot)) = case maybeRoot of
113 Nothing -> (f t, [])
114 Just r -> (f r, [f t])
115
116
117
118 mapTermListRoot :: [ListId]
119 -> NgramsType
120 -> NodeListStory
121 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
122 mapTermListRoot nodeIds ngramsType repo =
123 (\nre -> (_nre_list nre, _nre_root nre))
124 <$> listNgramsFromRepo nodeIds ngramsType repo
125
126
127
128
129 filterListWithRootHashMap :: ListType
130 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
131 -> HashMap NgramsTerm (Maybe RootTerm)
132 filterListWithRootHashMap lt m = snd <$> HM.filter isMapTerm m
133 where
134 isMapTerm (l, maybeRoot) = case maybeRoot of
135 Nothing -> l == lt
136 Just r -> case HM.lookup r m of
137 Nothing -> panic $ "[Garg.API.Ngrams.Tools] filterWithRoot, unknown key: " <> unNgramsTerm r
138 Just (l',_) -> l' == lt
139
140 filterListWithRoot :: [ListType]
141 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
142 -> HashMap NgramsTerm (Maybe RootTerm)
143 filterListWithRoot lt m = snd <$> HM.filter isMapTerm m
144 where
145 isMapTerm (l, maybeRoot) = case maybeRoot of
146 Nothing -> elem l lt
147 Just r -> case HM.lookup r m of
148 Nothing -> panic $ "[Garg.API.Ngrams.Tools] filterWithRoot, unknown key: " <> unNgramsTerm r
149 Just (l',_) -> elem l' lt
150
151 groupNodesByNgrams :: ( At root_map
152 , Index root_map ~ NgramsTerm
153 , IxValue root_map ~ Maybe RootTerm
154 )
155 => root_map
156 -> HashMap NgramsTerm (Set NodeId)
157 -> HashMap NgramsTerm (Set NodeId)
158 groupNodesByNgrams syn occs = HM.fromListWith (<>) occs'
159 where
160 occs' = map toSyn (HM.toList occs)
161 toSyn (t,ns) = case syn ^. at t of
162 Nothing -> panic $ "[Garg.API.Ngrams.Tools.groupNodesByNgrams] unknown key: " <> unNgramsTerm t
163 Just r -> case r of
164 Nothing -> (t, ns)
165 Just r' -> (r',ns)
166
167 data Diagonal = Diagonal Bool
168
169 getCoocByNgrams :: Diagonal
170 -> HashMap NgramsTerm (Set NodeId)
171 -> HashMap (NgramsTerm, NgramsTerm) Int
172 getCoocByNgrams = getCoocByNgrams' identity
173
174
175 getCoocByNgrams' :: (Hashable a, Ord a, Ord c)
176 => (b -> Set c)
177 -> Diagonal
178 -> HashMap a b
179 -> HashMap (a, a) Int
180 getCoocByNgrams' f (Diagonal diag) m =
181 HM.fromList [( (t1,t2)
182 , maybe 0 Set.size $ Set.intersection
183 <$> (fmap f $ HM.lookup t1 m)
184 <*> (fmap f $ HM.lookup t2 m)
185 )
186 | (t1,t2) <- if diag then
187 [ (x,y) | x <- ks, y <- ks, x <= y] -- TODO if we keep a Data.Map here it might be
188 -- more efficient to enumerate all the y <= x.
189 else
190 listToCombi identity ks
191 ]
192
193 where ks = HM.keys m
194
195 ------------------------------------------