]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams/Tools.hs
Merge remote-tracking branch 'origin/551-dev-graphql-contexts-ngrams' into dev-merge
[gargantext.git] / src / Gargantext / API / Ngrams / Tools.hs
1 {-|
2 Module : Gargantext.API.Ngrams.Tools
3 Description : Tools to manage Ngrams Elements (from the API)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE TypeFamilies #-}
13
14 module Gargantext.API.Ngrams.Tools
15 where
16
17 import Control.Concurrent
18 import Control.Lens (_Just, (^.), at, view, At, Index, IxValue)
19 import Control.Monad.Reader
20 import Data.HashMap.Strict (HashMap)
21 import Data.Hashable (Hashable)
22 import Data.Pool (withResource)
23 import Data.Set (Set)
24 import Data.Validity
25 import Gargantext.API.Ngrams.Types
26 import Gargantext.Core.NodeStory
27 import Gargantext.Core.Types (ListType(..), NodeId, NodeType(..), ListId)
28 import Gargantext.Database.Prelude (CmdM, HasConnectionPool(..))
29 import Gargantext.Database.Schema.Ngrams (NgramsType)
30 import Gargantext.Prelude
31 import qualified Data.HashMap.Strict as HM
32 import qualified Data.Map.Strict as Map
33 import qualified Data.Set as Set
34 import qualified Gargantext.Core.NodeStoryFile as NSF
35
36
37 mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
38 mergeNgramsElement _neOld neNew = neNew
39
40 type RootTerm = NgramsTerm
41
42
43 getRepo :: HasNodeStory env err m
44 => [ListId] -> m NodeListStory
45 getRepo listIds = do
46 f <- getNodeListStory
47 v <- liftBase $ f listIds
48 v' <- liftBase $ readMVar v
49 pure $ v'
50
51
52 repoSize :: Ord k1 => NodeStory (Map.Map k1 (Map.Map k2 a)) p
53 -> NodeId
54 -> Map.Map k1 Int
55 repoSize repo node_id = Map.map Map.size state
56 where
57 state = repo ^. unNodeStory
58 . at node_id . _Just
59 . a_state
60
61
62 getNodeStoryVar :: HasNodeStory env err m
63 => [ListId] -> m (MVar NodeListStory)
64 getNodeStoryVar l = do
65 f <- getNodeListStory
66 v <- liftBase $ f l
67 pure v
68
69
70 getNodeListStory :: HasNodeStory env err m
71 => m ([NodeId] -> IO (MVar NodeListStory))
72 getNodeListStory = do
73 env <- view hasNodeStory
74 pure $ view nse_getter env
75
76
77
78 listNgramsFromRepo :: [ListId]
79 -> NgramsType
80 -> NodeListStory
81 -> HashMap NgramsTerm NgramsRepoElement
82 listNgramsFromRepo nodeIds ngramsType repo =
83 HM.fromList $ Map.toList
84 $ Map.unionsWith mergeNgramsElement ngrams
85 where
86 ngrams = [ repo
87 ^. unNodeStory
88 . at nodeId . _Just
89 . a_state
90 . at ngramsType . _Just
91 | nodeId <- nodeIds
92 ]
93
94 -- TODO-ACCESS: We want to do the security check before entering here.
95 -- Add a static capability parameter would be nice.
96 -- Ideally this is the access to `repoVar` which needs to
97 -- be properly guarded.
98 getListNgrams :: HasNodeStory env err m
99 => [ListId] -> NgramsType
100 -> m (HashMap NgramsTerm NgramsRepoElement)
101 getListNgrams nodeIds ngramsType = listNgramsFromRepo nodeIds ngramsType
102 <$> getRepo nodeIds
103
104
105 getTermsWith :: (HasNodeStory env err m, Eq a, Hashable a)
106 => (NgramsTerm -> a) -> [ListId]
107 -> NgramsType -> Set ListType
108 -> m (HashMap a [a])
109 getTermsWith f ls ngt lts = HM.fromListWith (<>)
110 <$> map toTreeWith
111 <$> HM.toList
112 <$> HM.filter (\f' -> Set.member (fst f') lts)
113 <$> mapTermListRoot ls ngt
114 <$> getRepo ls
115 where
116 toTreeWith (t, (_lt, maybeRoot)) = case maybeRoot of
117 Nothing -> (f t, [])
118 Just r -> (f r, [f t])
119
120
121
122 mapTermListRoot :: [ListId]
123 -> NgramsType
124 -> NodeListStory
125 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
126 mapTermListRoot nodeIds ngramsType repo =
127 (\nre -> (_nre_list nre, _nre_root nre))
128 <$> listNgramsFromRepo nodeIds ngramsType repo
129
130
131
132
133 filterListWithRootHashMap :: ListType
134 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
135 -> HashMap NgramsTerm (Maybe RootTerm)
136 filterListWithRootHashMap lt m = snd <$> HM.filter isMapTerm m
137 where
138 isMapTerm (l, maybeRoot) = case maybeRoot of
139 Nothing -> l == lt
140 Just r -> case HM.lookup r m of
141 Nothing -> panic $ "[Garg.API.Ngrams.Tools] filterListWithRootHashMap, unknown key: " <> unNgramsTerm r
142 Just (l',_) -> l' == lt
143
144 filterListWithRoot :: [ListType]
145 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
146 -> HashMap NgramsTerm (Maybe RootTerm)
147 filterListWithRoot lt m = snd <$> HM.filter isMapTerm m
148 where
149 isMapTerm (l, maybeRoot) = case maybeRoot of
150 Nothing -> elem l lt
151 Just r -> case HM.lookup r m of
152 Nothing -> panic $ "[Garg.API.Ngrams.Tools] filterListWithRoot, unknown key: " <> unNgramsTerm r
153 Just (l',_) -> elem l' lt
154
155 groupNodesByNgrams :: ( At root_map
156 , Index root_map ~ NgramsTerm
157 , IxValue root_map ~ Maybe RootTerm
158 )
159 => root_map
160 -> HashMap NgramsTerm (Set NodeId)
161 -> HashMap NgramsTerm (Set NodeId)
162 groupNodesByNgrams syn occs = HM.fromListWith (<>) occs'
163 where
164 occs' = map toSyn (HM.toList occs)
165 toSyn (t,ns) = case syn ^. at t of
166 Nothing -> panic $ "[Garg.API.Ngrams.Tools.groupNodesByNgrams] unknown key: " <> unNgramsTerm t
167 Just r -> case r of
168 Nothing -> (t, ns)
169 Just r' -> (r',ns)
170
171 data Diagonal = Diagonal Bool
172
173 getCoocByNgrams :: Diagonal
174 -> HashMap NgramsTerm (Set NodeId)
175 -> HashMap (NgramsTerm, NgramsTerm) Int
176 getCoocByNgrams = getCoocByNgrams' identity
177
178
179 getCoocByNgrams' :: (Hashable a, Ord a, Ord c)
180 => (b -> Set c)
181 -> Diagonal
182 -> HashMap a b
183 -> HashMap (a, a) Int
184 getCoocByNgrams' f (Diagonal diag) m =
185 HM.fromList [( (t1,t2)
186 , maybe 0 Set.size $ Set.intersection
187 <$> (fmap f $ HM.lookup t1 m)
188 <*> (fmap f $ HM.lookup t2 m)
189 )
190 | (t1,t2) <- if diag
191 then [ (x,y) | x <- ks, y <- ks, x <= y]
192 -- TODO if we keep a Data.Map here it might be
193 -- more efficient to enumerate all the y <= x.
194 else
195 listToCombi identity ks
196 ]
197
198 where
199 ks = HM.keys m
200
201 -- TODO k could be either k1 or k2 here
202 getCoocByNgrams'' :: (Hashable k, Ord k, Ord contexts)
203 => Diagonal
204 -> (contextA -> Set contexts, contextB -> Set contexts)
205 -> (HashMap k contextA, HashMap k contextB)
206 -> HashMap (k, k) Int
207 getCoocByNgrams'' (Diagonal diag) (f1,f2) (m1,m2) =
208 HM.fromList [( (t1,t2)
209 , maybe 0 Set.size $ Set.intersection
210 <$> (fmap f1 $ HM.lookup t1 m1)
211 <*> (fmap f2 $ HM.lookup t2 m2)
212 )
213 | (t1,t2) <- if diag
214 then
215 [ (x,y) | x <- ks1, y <- ks2, x <= y]
216 -- TODO if we keep a Data.Map here it might be
217 -- more efficient to enumerate all the y <= x.
218 else
219 [ (x,y) | x <- ks1, y <- ks2, x < y]
220 -- TODO check optim
221 -- listToCombi identity ks1
222 ]
223 where
224 ks1 = HM.keys m1
225 ks2 = HM.keys m2
226
227
228
229 ------------------------------------------
230
231
232 migrateFromDirToDb :: (CmdM env err m) -- , HasNodeStory env err m)
233 => m ()
234 migrateFromDirToDb = do
235 pool <- view connPool
236 withResource pool $ \c -> do
237 listIds <- liftBase $ getNodesIdWithType c NodeList
238 -- printDebug "[migrateFromDirToDb] listIds" listIds
239 (NodeStory nls) <- NSF.getRepoReadConfig listIds
240 -- printDebug "[migrateFromDirToDb] nls" nls
241 _ <- mapM (\(nId, a) -> do
242 n <- liftBase $ nodeExists c nId
243 case n of
244 False -> pure ()
245 True -> liftBase $ upsertNodeStories c nId a
246 ) $ Map.toList nls
247 --_ <- nodeStoryIncs (Just $ NodeStory nls) listIds
248 pure ()