]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams/Tools.hs
Merge remote-tracking branch 'origin/438-dev-team-node-creator' into dev-merge
[gargantext.git] / src / Gargantext / API / Ngrams / Tools.hs
1 {-|
2 Module : Gargantext.API.Ngrams.Tools
3 Description : Tools to manage Ngrams Elements (from the API)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE TypeFamilies #-}
13
14 module Gargantext.API.Ngrams.Tools
15 where
16
17 import Control.Concurrent
18 import Control.Lens (_Just, (^.), at, view, At, Index, IxValue)
19 import Control.Monad.Reader
20 import Data.HashMap.Strict (HashMap)
21 import Data.Hashable (Hashable)
22 import Data.Pool (withResource)
23 import Data.Set (Set)
24 import Data.Validity
25 import Gargantext.API.Ngrams.Types
26 import Gargantext.Core.NodeStory
27 import Gargantext.Core.Types (ListType(..), NodeId, NodeType(..), ListId)
28 import Gargantext.Database.Prelude (CmdM, HasConnectionPool(..))
29 import Gargantext.Database.Schema.Ngrams (NgramsType)
30 import Gargantext.Prelude
31 import qualified Data.HashMap.Strict as HM
32 import qualified Data.Map.Strict as Map
33 import qualified Data.Set as Set
34 import qualified Gargantext.Core.NodeStoryFile as NSF
35
36
37 mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
38 mergeNgramsElement _neOld neNew = neNew
39
40 type RootTerm = NgramsTerm
41
42
43 getRepo :: HasNodeStory env err m
44 => [ListId] -> m NodeListStory
45 getRepo listIds = do
46 f <- getNodeListStory
47 v <- liftBase $ f listIds
48 v' <- liftBase $ readMVar v
49 pure $ v'
50
51
52 repoSize :: Ord k1 => NodeStory (Map.Map k1 (Map.Map k2 a)) p
53 -> NodeId
54 -> Map.Map k1 Int
55 repoSize repo node_id = Map.map Map.size state
56 where
57 state = repo ^. unNodeStory
58 . at node_id . _Just
59 . a_state
60
61
62 getNodeStoryVar :: HasNodeStory env err m
63 => [ListId] -> m (MVar NodeListStory)
64 getNodeStoryVar l = do
65 f <- getNodeListStory
66 v <- liftBase $ f l
67 pure v
68
69
70 getNodeListStory :: HasNodeStory env err m
71 => m ([NodeId] -> IO (MVar NodeListStory))
72 getNodeListStory = do
73 env <- view hasNodeStory
74 pure $ view nse_getter env
75
76
77
78 listNgramsFromRepo :: [ListId]
79 -> NgramsType
80 -> NodeListStory
81 -> HashMap NgramsTerm NgramsRepoElement
82 listNgramsFromRepo nodeIds ngramsType repo =
83 HM.fromList $ Map.toList
84 $ Map.unionsWith mergeNgramsElement ngrams
85 where
86 ngrams = [ repo
87 ^. unNodeStory
88 . at nodeId . _Just
89 . a_state
90 . at ngramsType . _Just
91 | nodeId <- nodeIds
92 ]
93
94 -- TODO-ACCESS: We want to do the security check before entering here.
95 -- Add a static capability parameter would be nice.
96 -- Ideally this is the access to `repoVar` which needs to
97 -- be properly guarded.
98 getListNgrams :: HasNodeStory env err m
99 => [ListId] -> NgramsType
100 -> m (HashMap NgramsTerm NgramsRepoElement)
101 getListNgrams nodeIds ngramsType = listNgramsFromRepo nodeIds ngramsType
102 <$> getRepo nodeIds
103
104
105 getTermsWith :: (HasNodeStory env err m, Eq a, Hashable a)
106 => (NgramsTerm -> a) -> [ListId]
107 -> NgramsType -> Set ListType
108 -> m (HashMap a [a])
109 getTermsWith f ls ngt lts = HM.fromListWith (<>)
110 <$> map toTreeWith
111 <$> HM.toList
112 <$> HM.filter (\f' -> Set.member (fst f') lts)
113 <$> mapTermListRoot ls ngt
114 <$> getRepo ls
115 where
116 toTreeWith (t, (_lt, maybeRoot)) = case maybeRoot of
117 Nothing -> (f t, [])
118 Just r -> (f r, [f t])
119
120
121
122 mapTermListRoot :: [ListId]
123 -> NgramsType
124 -> NodeListStory
125 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
126 mapTermListRoot nodeIds ngramsType repo =
127 (\nre -> (_nre_list nre, _nre_root nre))
128 <$> listNgramsFromRepo nodeIds ngramsType repo
129
130
131
132
133 filterListWithRootHashMap :: ListType
134 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
135 -> HashMap NgramsTerm (Maybe RootTerm)
136 filterListWithRootHashMap lt m = snd <$> HM.filter isMapTerm m
137 where
138 isMapTerm (l, maybeRoot) = case maybeRoot of
139 Nothing -> l == lt
140 Just r -> case HM.lookup r m of
141 Nothing -> panic $ "[Garg.API.Ngrams.Tools] filterWithRoot, unknown key: " <> unNgramsTerm r
142 Just (l',_) -> l' == lt
143
144 filterListWithRoot :: [ListType]
145 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
146 -> HashMap NgramsTerm (Maybe RootTerm)
147 filterListWithRoot lt m = snd <$> HM.filter isMapTerm m
148 where
149 isMapTerm (l, maybeRoot) = case maybeRoot of
150 Nothing -> elem l lt
151 Just r -> case HM.lookup r m of
152 Nothing -> panic $ "[Garg.API.Ngrams.Tools] filterWithRoot, unknown key: " <> unNgramsTerm r
153 Just (l',_) -> elem l' lt
154
155 groupNodesByNgrams :: ( At root_map
156 , Index root_map ~ NgramsTerm
157 , IxValue root_map ~ Maybe RootTerm
158 )
159 => root_map
160 -> HashMap NgramsTerm (Set NodeId)
161 -> HashMap NgramsTerm (Set NodeId)
162 groupNodesByNgrams syn occs = HM.fromListWith (<>) occs'
163 where
164 occs' = map toSyn (HM.toList occs)
165 toSyn (t,ns) = case syn ^. at t of
166 Nothing -> panic $ "[Garg.API.Ngrams.Tools.groupNodesByNgrams] unknown key: " <> unNgramsTerm t
167 Just r -> case r of
168 Nothing -> (t, ns)
169 Just r' -> (r',ns)
170
171 data Diagonal = Diagonal Bool
172
173 getCoocByNgrams :: Diagonal
174 -> HashMap NgramsTerm (Set NodeId)
175 -> HashMap (NgramsTerm, NgramsTerm) Int
176 getCoocByNgrams = getCoocByNgrams' identity
177
178
179 getCoocByNgrams' :: (Hashable a, Ord a, Ord c)
180 => (b -> Set c)
181 -> Diagonal
182 -> HashMap a b
183 -> HashMap (a, a) Int
184 getCoocByNgrams' f (Diagonal diag) m =
185 HM.fromList [( (t1,t2)
186 , maybe 0 Set.size $ Set.intersection
187 <$> (fmap f $ HM.lookup t1 m)
188 <*> (fmap f $ HM.lookup t2 m)
189 )
190 | (t1,t2) <- if diag then
191 [ (x,y) | x <- ks, y <- ks, x <= y] -- TODO if we keep a Data.Map here it might be
192 -- more efficient to enumerate all the y <= x.
193 else
194 listToCombi identity ks
195 ]
196
197 where ks = HM.keys m
198
199 ------------------------------------------
200
201
202 migrateFromDirToDb :: (CmdM env err m) -- , HasNodeStory env err m)
203 => m ()
204 migrateFromDirToDb = do
205 pool <- view connPool
206 withResource pool $ \c -> do
207 listIds <- liftBase $ getNodesIdWithType c NodeList
208 printDebug "[migrateFromDirToDb] listIds" listIds
209 (NodeStory nls) <- NSF.getRepoReadConfig listIds
210 printDebug "[migrateFromDirToDb] nls" nls
211 _ <- mapM (\(nId, a) -> do
212 n <- liftBase $ nodeExists c nId
213 case n of
214 False -> pure ()
215 True -> liftBase $ upsertNodeStories c nId a
216 ) $ Map.toList nls
217 --_ <- nodeStoryIncs (Just $ NodeStory nls) listIds
218 pure ()