]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams/Tools.hs
Merge branch '106-dev-ngrams-score-fix' of ssh://gitlab.iscpif.fr:20022/gargantext...
[gargantext.git] / src / Gargantext / API / Ngrams / Tools.hs
1 {-|
2 Module : Gargantext.API.Ngrams.Tools
3 Description : Tools to manage Ngrams Elements (from the API)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE TypeFamilies #-}
13
14 module Gargantext.API.Ngrams.Tools
15 where
16
17 import Control.Concurrent
18 import Control.Lens (_Just, (^.), at, view, At, Index, IxValue)
19 import Control.Monad.Reader
20 import Data.HashMap.Strict (HashMap)
21 import Data.Hashable (Hashable)
22 import Data.Set (Set)
23 import Data.Validity
24 import Gargantext.API.Ngrams.Types
25 import Gargantext.Core.Types (ListType(..), NodeId, ListId)
26 import Gargantext.Database.Schema.Ngrams (NgramsType)
27 import Gargantext.Prelude
28 import qualified Data.HashMap.Strict as HM
29 import qualified Data.Map.Strict as Map
30 import qualified Data.Set as Set
31 import Gargantext.Core.NodeStory
32
33 mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
34 mergeNgramsElement _neOld neNew = neNew
35
36 type RootTerm = NgramsTerm
37
38 {-
39 getRepo :: RepoCmdM env err m => m NgramsRepo
40 getRepo = do
41 v <- view repoVar
42 liftBase $ readMVar v
43 -}
44
45 getRepo' :: HasNodeStory env err m
46 => [ListId] -> m NodeListStory
47 getRepo' listIds = do
48 f <- getNodeListStory
49 v <- liftBase $ f listIds
50 v' <- liftBase $ readMVar v
51 pure $ v'
52
53
54 getNodeStoryVar :: HasNodeStory env err m
55 => [ListId] -> m (MVar NodeListStory)
56 getNodeStoryVar l = do
57 f <- getNodeListStory
58 v <- liftBase $ f l
59 pure v
60
61
62 getNodeListStory :: HasNodeStory env err m
63 => m ([NodeId] -> IO (MVar NodeListStory))
64 getNodeListStory = do
65 env <- view hasNodeStory
66 pure $ view nse_getter env
67
68
69
70 listNgramsFromRepo :: [ListId]
71 -> NgramsType
72 -> NodeListStory
73 -> HashMap NgramsTerm NgramsRepoElement
74 listNgramsFromRepo nodeIds ngramsType repo =
75 HM.fromList $ Map.toList
76 $ Map.unionsWith mergeNgramsElement ngrams
77 where
78 ngrams = [ repo
79 ^. unNodeStory
80 . at nodeId . _Just
81 . a_state
82 . at ngramsType . _Just
83 | nodeId <- nodeIds
84 ]
85
86
87
88 -- TODO-ACCESS: We want to do the security check before entering here.
89 -- Add a static capability parameter would be nice.
90 -- Ideally this is the access to `repoVar` which needs to
91 -- be properly guarded.
92 getListNgrams :: HasNodeStory env err m
93 => [ListId] -> NgramsType
94 -> m (HashMap NgramsTerm NgramsRepoElement)
95 getListNgrams nodeIds ngramsType = listNgramsFromRepo nodeIds ngramsType
96 <$> getRepo' nodeIds
97
98
99 getTermsWith :: (HasNodeStory env err m, Eq a, Hashable a)
100 => (NgramsTerm -> a) -> [ListId]
101 -> NgramsType -> Set ListType
102 -> m (HashMap a [a])
103 getTermsWith f ls ngt lts = HM.fromListWith (<>)
104 <$> map toTreeWith
105 <$> HM.toList
106 <$> HM.filter (\f' -> Set.member (fst f') lts)
107 <$> mapTermListRoot ls ngt
108 <$> getRepo' ls
109 where
110 toTreeWith (t, (_lt, maybeRoot)) = case maybeRoot of
111 Nothing -> (f t, [])
112 Just r -> (f r, [f t])
113
114
115
116 mapTermListRoot :: [ListId]
117 -> NgramsType
118 -> NodeListStory
119 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
120 mapTermListRoot nodeIds ngramsType repo =
121 (\nre -> (_nre_list nre, _nre_root nre))
122 <$> listNgramsFromRepo nodeIds ngramsType repo
123
124
125
126
127 filterListWithRootHashMap :: ListType
128 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
129 -> HashMap NgramsTerm (Maybe RootTerm)
130 filterListWithRootHashMap lt m = snd <$> HM.filter isMapTerm m
131 where
132 isMapTerm (l, maybeRoot) = case maybeRoot of
133 Nothing -> l == lt
134 Just r -> case HM.lookup r m of
135 Nothing -> panic $ "[Garg.API.Ngrams.Tools] filterWithRoot, unknown key: " <> unNgramsTerm r
136 Just (l',_) -> l' == lt
137
138 filterListWithRoot :: [ListType]
139 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
140 -> HashMap NgramsTerm (Maybe RootTerm)
141 filterListWithRoot lt m = snd <$> HM.filter isMapTerm m
142 where
143 isMapTerm (l, maybeRoot) = case maybeRoot of
144 Nothing -> elem l lt
145 Just r -> case HM.lookup r m of
146 Nothing -> panic $ "[Garg.API.Ngrams.Tools] filterWithRoot, unknown key: " <> unNgramsTerm r
147 Just (l',_) -> elem l' lt
148
149 groupNodesByNgrams :: ( At root_map
150 , Index root_map ~ NgramsTerm
151 , IxValue root_map ~ Maybe RootTerm
152 )
153 => root_map
154 -> HashMap NgramsTerm (Set NodeId)
155 -> HashMap NgramsTerm (Set NodeId)
156 groupNodesByNgrams syn occs = HM.fromListWith (<>) occs'
157 where
158 occs' = map toSyn (HM.toList occs)
159 toSyn (t,ns) = case syn ^. at t of
160 Nothing -> panic $ "[Garg.API.Ngrams.Tools.groupNodesByNgrams] unknown key: " <> unNgramsTerm t
161 Just r -> case r of
162 Nothing -> (t, ns)
163 Just r' -> (r',ns)
164
165 data Diagonal = Diagonal Bool
166
167 getCoocByNgrams :: Diagonal
168 -> HashMap NgramsTerm (Set NodeId)
169 -> HashMap (NgramsTerm, NgramsTerm) Int
170 getCoocByNgrams = getCoocByNgrams' identity
171
172
173 getCoocByNgrams' :: (Hashable a, Ord a, Ord c)
174 => (b -> Set c)
175 -> Diagonal
176 -> HashMap a b
177 -> HashMap (a, a) Int
178 getCoocByNgrams' f (Diagonal diag) m =
179 HM.fromList [( (t1,t2)
180 , maybe 0 Set.size $ Set.intersection
181 <$> (fmap f $ HM.lookup t1 m)
182 <*> (fmap f $ HM.lookup t2 m)
183 )
184 | (t1,t2) <- if diag then
185 [ (x,y) | x <- ks, y <- ks, x <= y] -- TODO if we keep a Data.Map here it might be
186 -- more efficient to enumerate all the y <= x.
187 else
188 listToCombi identity ks
189 ]
190
191 where ks = HM.keys m
192
193 ------------------------------------------