]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams/Tools.hs
[FIX] merge
[gargantext.git] / src / Gargantext / API / Ngrams / Tools.hs
1 {-|
2 Module : Gargantext.API.Ngrams.Tools
3 Description : Tools to manage Ngrams Elements (from the API)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE TypeFamilies #-}
13
14 module Gargantext.API.Ngrams.Tools
15 where
16
17 import Control.Concurrent
18 import Control.Lens (_Just, (^.), at, view, At, Index, IxValue)
19 import Control.Monad.Reader
20 import Data.HashMap.Strict (HashMap)
21 import Data.Hashable (Hashable)
22 import Data.Set (Set)
23 import Data.Validity
24 import Gargantext.API.Ngrams.Types
25 import Gargantext.Core.Types (ListType(..), NodeId, ListId)
26 import Gargantext.Database.Schema.Ngrams (NgramsType)
27 import Gargantext.Prelude
28 import qualified Data.HashMap.Strict as HM
29 import qualified Data.Map.Strict as Map
30 import qualified Data.Set as Set
31 import qualified Data.List as List
32 import Gargantext.Core.NodeStory
33
34 mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
35 mergeNgramsElement _neOld neNew = neNew
36
37 type RootTerm = NgramsTerm
38
39
40 getRepo :: RepoCmdM env err m => m NgramsRepo
41 getRepo = do
42 v <- view repoVar
43 liftBase $ readMVar v
44
45
46 getRepo' :: HasNodeStory env err m
47 => [ListId] -> m NodeListStory
48 getRepo' listIds = do
49 maybeNodeListStory <- head <$> List.reverse <$> mapM getNodeListStory'' listIds
50 case maybeNodeListStory of
51 Nothing -> panic "[G.A.N.Tools.getRepo']"
52 Just nls -> pure nls
53
54 getRepoVar :: HasNodeStory env err m
55 => ListId -> m (MVar NodeListStory)
56 getRepoVar l = do
57 f <- getNodeListStory
58 v <- liftBase $ f l
59 pure v
60
61 getNodeListStory :: HasNodeStory env err m
62 => m (NodeId -> IO (MVar NodeListStory))
63 getNodeListStory = do
64 env <- view hasNodeStory
65 pure $ view nse_getter env
66
67 getNodeListStory' :: HasNodeStory env err m
68 => NodeId -> m (IO NodeListStory)
69 getNodeListStory' n = do
70 f <- getNodeListStory
71 v <- liftBase $ f n
72 pure $ readMVar v
73
74 getNodeListStory'' :: HasNodeStory env err m
75 => NodeId -> m NodeListStory
76 getNodeListStory'' n = do
77 f <- getNodeListStory
78 v <- liftBase $ f n
79 v' <- liftBase $ readMVar v
80 pure $ v'
81
82
83
84 listNgramsFromRepo :: [ListId] -> NgramsType
85 -> NodeListStory -> HashMap NgramsTerm NgramsRepoElement
86 listNgramsFromRepo nodeIds ngramsType repo =
87 HM.fromList $ Map.toList
88 $ Map.unionsWith mergeNgramsElement ngrams
89 where
90 ngrams = [ repo
91 ^. unNodeStory
92 . at nodeId . _Just
93 . a_state
94 . at ngramsType . _Just
95 | nodeId <- nodeIds
96 ]
97
98
99
100 -- TODO-ACCESS: We want to do the security check before entering here.
101 -- Add a static capability parameter would be nice.
102 -- Ideally this is the access to `repoVar` which needs to
103 -- be properly guarded.
104 getListNgrams :: HasNodeStory env err m
105 => [ListId] -> NgramsType
106 -> m (HashMap NgramsTerm NgramsRepoElement)
107 getListNgrams nodeIds ngramsType = listNgramsFromRepo nodeIds ngramsType
108 <$> getRepo' nodeIds
109
110
111 getTermsWith :: (HasNodeStory env err m, Eq a, Hashable a)
112 => (NgramsTerm -> a) -> [ListId]
113 -> NgramsType -> Set ListType
114 -> m (HashMap a [a])
115 getTermsWith f ls ngt lts = HM.fromListWith (<>)
116 <$> map toTreeWith
117 <$> HM.toList
118 <$> HM.filter (\f' -> Set.member (fst f') lts)
119 <$> mapTermListRoot ls ngt
120 <$> getRepo' ls
121 where
122 toTreeWith (t, (_lt, maybeRoot)) = case maybeRoot of
123 Nothing -> (f t, [])
124 Just r -> (f r, [f t])
125
126
127
128 mapTermListRoot :: [ListId]
129 -> NgramsType
130 -> NodeListStory
131 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
132 mapTermListRoot nodeIds ngramsType repo =
133 (\nre -> (_nre_list nre, _nre_root nre))
134 <$> listNgramsFromRepo nodeIds ngramsType repo
135
136
137
138
139 filterListWithRootHashMap :: ListType
140 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
141 -> HashMap NgramsTerm (Maybe RootTerm)
142 filterListWithRootHashMap lt m = snd <$> HM.filter isMapTerm m
143 where
144 isMapTerm (l, maybeRoot) = case maybeRoot of
145 Nothing -> l == lt
146 Just r -> case HM.lookup r m of
147 Nothing -> panic $ "[Garg.API.Ngrams.Tools] filterWithRoot, unknown key: " <> unNgramsTerm r
148 Just (l',_) -> l' == lt
149
150 filterListWithRoot :: ListType
151 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
152 -> HashMap NgramsTerm (Maybe RootTerm)
153 filterListWithRoot lt m = snd <$> HM.filter isMapTerm m
154 where
155 isMapTerm (l, maybeRoot) = case maybeRoot of
156 Nothing -> l == lt
157 Just r -> case HM.lookup r m of
158 Nothing -> panic $ "[Garg.API.Ngrams.Tools] filterWithRoot, unknown key: " <> unNgramsTerm r
159 Just (l',_) -> l' == lt
160
161 groupNodesByNgrams :: ( At root_map
162 , Index root_map ~ NgramsTerm
163 , IxValue root_map ~ Maybe RootTerm
164 )
165 => root_map
166 -> HashMap NgramsTerm (Set NodeId)
167 -> HashMap NgramsTerm (Set NodeId)
168 groupNodesByNgrams syn occs = HM.fromListWith (<>) occs'
169 where
170 occs' = map toSyn (HM.toList occs)
171 toSyn (t,ns) = case syn ^. at t of
172 Nothing -> panic $ "[Garg.API.Ngrams.Tools.groupNodesByNgrams] unknown key: " <> unNgramsTerm t
173 Just r -> case r of
174 Nothing -> (t, ns)
175 Just r' -> (r',ns)
176
177 data Diagonal = Diagonal Bool
178
179 getCoocByNgrams :: Diagonal
180 -> HashMap NgramsTerm (Set NodeId)
181 -> HashMap (NgramsTerm, NgramsTerm) Int
182 getCoocByNgrams = getCoocByNgrams' identity
183
184
185 getCoocByNgrams' :: (Hashable a, Ord a, Ord c)
186 => (b -> Set c)
187 -> Diagonal
188 -> HashMap a b
189 -> HashMap (a, a) Int
190 getCoocByNgrams' f (Diagonal diag) m =
191 HM.fromList [( (t1,t2)
192 , maybe 0 Set.size $ Set.intersection
193 <$> (fmap f $ HM.lookup t1 m)
194 <*> (fmap f $ HM.lookup t2 m)
195 )
196 | (t1,t2) <- if diag then
197 [ (x,y) | x <- ks, y <- ks, x <= y] -- TODO if we keep a Data.Map here it might be
198 -- more efficient to enumerate all the y <= x.
199 else
200 listToCombi identity ks
201 ]
202
203 where ks = HM.keys m
204
205 ------------------------------------------