]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams/Tools.hs
[nodeStory] optimize node_stories table
[gargantext.git] / src / Gargantext / API / Ngrams / Tools.hs
1 {-|
2 Module : Gargantext.API.Ngrams.Tools
3 Description : Tools to manage Ngrams Elements (from the API)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE TypeFamilies #-}
13
14 module Gargantext.API.Ngrams.Tools
15 where
16
17 import Control.Concurrent
18 import Control.Lens (_Just, (^.), at, view, At, Index, IxValue)
19 import Control.Monad.Reader
20 import Data.HashMap.Strict (HashMap)
21 import Data.Hashable (Hashable)
22 import Data.Set (Set)
23 import Data.Validity
24 import Gargantext.API.Ngrams.Types
25 import Gargantext.Core.NodeStory
26 import Gargantext.Core.Types (ListType(..), NodeId, NodeType(..), ListId)
27 import Gargantext.Database.Prelude (CmdM, HasConnectionPool(..))
28 import Gargantext.Database.Schema.Ngrams (NgramsType)
29 import Gargantext.Prelude
30 import qualified Data.HashMap.Strict as HM
31 import qualified Data.Map.Strict as Map
32 import qualified Data.Set as Set
33 import qualified Gargantext.Core.NodeStoryFile as NSF
34
35
36 mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
37 mergeNgramsElement _neOld neNew = neNew
38
39 type RootTerm = NgramsTerm
40
41
42 getRepo :: HasNodeStory env err m
43 => [ListId] -> m NodeListStory
44 getRepo listIds = do
45 f <- getNodeListStory
46 v <- liftBase $ f listIds
47 v' <- liftBase $ readMVar v
48 pure $ v'
49
50
51 repoSize :: Ord k1 => NodeStory (Map.Map k1 (Map.Map k2 a)) p
52 -> NodeId
53 -> Map.Map k1 Int
54 repoSize repo node_id = Map.map Map.size state
55 where
56 state = repo ^. unNodeStory
57 . at node_id . _Just
58 . a_state
59
60
61 getNodeStoryVar :: HasNodeStory env err m
62 => [ListId] -> m (MVar NodeListStory)
63 getNodeStoryVar l = do
64 f <- getNodeListStory
65 v <- liftBase $ f l
66 pure v
67
68
69 getNodeListStory :: HasNodeStory env err m
70 => m ([NodeId] -> IO (MVar NodeListStory))
71 getNodeListStory = do
72 env <- view hasNodeStory
73 pure $ view nse_getter env
74
75
76
77 listNgramsFromRepo :: [ListId]
78 -> NgramsType
79 -> NodeListStory
80 -> HashMap NgramsTerm NgramsRepoElement
81 listNgramsFromRepo nodeIds ngramsType repo =
82 HM.fromList $ Map.toList
83 $ Map.unionsWith mergeNgramsElement ngrams
84 where
85 ngrams = [ repo
86 ^. unNodeStory
87 . at nodeId . _Just
88 . a_state
89 . at ngramsType . _Just
90 | nodeId <- nodeIds
91 ]
92
93 -- TODO-ACCESS: We want to do the security check before entering here.
94 -- Add a static capability parameter would be nice.
95 -- Ideally this is the access to `repoVar` which needs to
96 -- be properly guarded.
97 getListNgrams :: HasNodeStory env err m
98 => [ListId] -> NgramsType
99 -> m (HashMap NgramsTerm NgramsRepoElement)
100 getListNgrams nodeIds ngramsType = listNgramsFromRepo nodeIds ngramsType
101 <$> getRepo nodeIds
102
103
104 getTermsWith :: (HasNodeStory env err m, Eq a, Hashable a)
105 => (NgramsTerm -> a) -> [ListId]
106 -> NgramsType -> Set ListType
107 -> m (HashMap a [a])
108 getTermsWith f ls ngt lts = HM.fromListWith (<>)
109 <$> map toTreeWith
110 <$> HM.toList
111 <$> HM.filter (\f' -> Set.member (fst f') lts)
112 <$> mapTermListRoot ls ngt
113 <$> getRepo ls
114 where
115 toTreeWith (t, (_lt, maybeRoot)) = case maybeRoot of
116 Nothing -> (f t, [])
117 Just r -> (f r, [f t])
118
119
120
121 mapTermListRoot :: [ListId]
122 -> NgramsType
123 -> NodeListStory
124 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
125 mapTermListRoot nodeIds ngramsType repo =
126 (\nre -> (_nre_list nre, _nre_root nre))
127 <$> listNgramsFromRepo nodeIds ngramsType repo
128
129
130
131
132 filterListWithRootHashMap :: ListType
133 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
134 -> HashMap NgramsTerm (Maybe RootTerm)
135 filterListWithRootHashMap lt m = snd <$> HM.filter isMapTerm m
136 where
137 isMapTerm (l, maybeRoot) = case maybeRoot of
138 Nothing -> l == lt
139 Just r -> case HM.lookup r m of
140 Nothing -> panic $ "[Garg.API.Ngrams.Tools] filterWithRoot, unknown key: " <> unNgramsTerm r
141 Just (l',_) -> l' == lt
142
143 filterListWithRoot :: [ListType]
144 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
145 -> HashMap NgramsTerm (Maybe RootTerm)
146 filterListWithRoot lt m = snd <$> HM.filter isMapTerm m
147 where
148 isMapTerm (l, maybeRoot) = case maybeRoot of
149 Nothing -> elem l lt
150 Just r -> case HM.lookup r m of
151 Nothing -> panic $ "[Garg.API.Ngrams.Tools] filterWithRoot, unknown key: " <> unNgramsTerm r
152 Just (l',_) -> elem l' lt
153
154 groupNodesByNgrams :: ( At root_map
155 , Index root_map ~ NgramsTerm
156 , IxValue root_map ~ Maybe RootTerm
157 )
158 => root_map
159 -> HashMap NgramsTerm (Set NodeId)
160 -> HashMap NgramsTerm (Set NodeId)
161 groupNodesByNgrams syn occs = HM.fromListWith (<>) occs'
162 where
163 occs' = map toSyn (HM.toList occs)
164 toSyn (t,ns) = case syn ^. at t of
165 Nothing -> panic $ "[Garg.API.Ngrams.Tools.groupNodesByNgrams] unknown key: " <> unNgramsTerm t
166 Just r -> case r of
167 Nothing -> (t, ns)
168 Just r' -> (r',ns)
169
170 data Diagonal = Diagonal Bool
171
172 getCoocByNgrams :: Diagonal
173 -> HashMap NgramsTerm (Set NodeId)
174 -> HashMap (NgramsTerm, NgramsTerm) Int
175 getCoocByNgrams = getCoocByNgrams' identity
176
177
178 getCoocByNgrams' :: (Hashable a, Ord a, Ord c)
179 => (b -> Set c)
180 -> Diagonal
181 -> HashMap a b
182 -> HashMap (a, a) Int
183 getCoocByNgrams' f (Diagonal diag) m =
184 HM.fromList [( (t1,t2)
185 , maybe 0 Set.size $ Set.intersection
186 <$> (fmap f $ HM.lookup t1 m)
187 <*> (fmap f $ HM.lookup t2 m)
188 )
189 | (t1,t2) <- if diag then
190 [ (x,y) | x <- ks, y <- ks, x <= y] -- TODO if we keep a Data.Map here it might be
191 -- more efficient to enumerate all the y <= x.
192 else
193 listToCombi identity ks
194 ]
195
196 where ks = HM.keys m
197
198 ------------------------------------------
199
200
201 migrateFromDirToDb :: (CmdM env err m, HasNodeStory env err m)
202 => m ()
203 migrateFromDirToDb = do
204 pool <- view connPool
205 listIds <- liftBase $ getNodesIdWithType pool NodeList
206 printDebug "[migrateFromDirToDb] listIds" listIds
207 (NodeStory nls) <- NSF.getRepoReadConfig listIds
208 printDebug "[migrateFromDirToDb] nls" nls
209 _ <- mapM (\(nId, a) -> do
210 n <- liftBase $ nodeExists pool nId
211 case n of
212 False -> pure ()
213 True -> liftBase $ upsertNodeArchive pool nId a
214 ) $ Map.toList nls
215 --_ <- nodeStoryIncs (Just $ NodeStory nls) listIds
216 pure ()