2 Module : Gargantext.API.Ngrams.Tools
3 Description : Tools to manage Ngrams Elements (from the API)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE TypeFamilies #-}
14 module Gargantext.API.Ngrams.Tools
17 import Control.Concurrent
18 import Control.Lens (_Just, (^.), at, view, At, Index, IxValue)
19 import Control.Monad.Reader
20 import Data.HashMap.Strict (HashMap)
21 import Data.Hashable (Hashable)
22 import Data.Pool (withResource)
25 import Gargantext.API.Ngrams.Types
26 import Gargantext.Core.NodeStory
27 import Gargantext.Core.Types (ListType(..), NodeId, NodeType(..), ListId)
28 import Gargantext.Database.Prelude (CmdM, HasConnectionPool(..))
29 import Gargantext.Database.Schema.Ngrams (NgramsType)
30 import Gargantext.Prelude
31 import qualified Data.HashMap.Strict as HM
32 import qualified Data.Map.Strict as Map
33 import qualified Data.Set as Set
34 import qualified Gargantext.Core.NodeStoryFile as NSF
37 mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
38 mergeNgramsElement _neOld neNew = neNew
40 type RootTerm = NgramsTerm
43 getRepo :: HasNodeStory env err m
44 => [ListId] -> m NodeListStory
47 v <- liftBase $ f listIds
48 v' <- liftBase $ readMVar v
52 repoSize :: Ord k1 => NodeStory (Map.Map k1 (Map.Map k2 a)) p
55 repoSize repo node_id = Map.map Map.size state
57 state = repo ^. unNodeStory
62 getNodeStoryVar :: HasNodeStory env err m
63 => [ListId] -> m (MVar NodeListStory)
64 getNodeStoryVar l = do
70 getNodeListStory :: HasNodeStory env err m
71 => m ([NodeId] -> IO (MVar NodeListStory))
73 env <- view hasNodeStory
74 pure $ view nse_getter env
78 listNgramsFromRepo :: [ListId]
81 -> HashMap NgramsTerm NgramsRepoElement
82 listNgramsFromRepo nodeIds ngramsType repo =
83 HM.fromList $ Map.toList
84 $ Map.unionsWith mergeNgramsElement ngrams
90 . at ngramsType . _Just
94 -- TODO-ACCESS: We want to do the security check before entering here.
95 -- Add a static capability parameter would be nice.
96 -- Ideally this is the access to `repoVar` which needs to
97 -- be properly guarded.
98 getListNgrams :: HasNodeStory env err m
99 => [ListId] -> NgramsType
100 -> m (HashMap NgramsTerm NgramsRepoElement)
101 getListNgrams nodeIds ngramsType = listNgramsFromRepo nodeIds ngramsType
105 getTermsWith :: (HasNodeStory env err m, Eq a, Hashable a)
106 => (NgramsTerm -> a) -> [ListId]
107 -> NgramsType -> Set ListType
109 getTermsWith f ls ngt lts = HM.fromListWith (<>)
112 <$> HM.filter (\f' -> Set.member (fst f') lts)
113 <$> mapTermListRoot ls ngt
116 toTreeWith (t, (_lt, maybeRoot)) = case maybeRoot of
118 Just r -> (f r, [f t])
122 mapTermListRoot :: [ListId]
125 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
126 mapTermListRoot nodeIds ngramsType repo =
127 (\nre -> (_nre_list nre, _nre_root nre))
128 <$> listNgramsFromRepo nodeIds ngramsType repo
133 filterListWithRootHashMap :: ListType
134 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
135 -> HashMap NgramsTerm (Maybe RootTerm)
136 filterListWithRootHashMap lt m = snd <$> HM.filter isMapTerm m
138 isMapTerm (l, maybeRoot) = case maybeRoot of
140 Just r -> case HM.lookup r m of
141 Nothing -> panic $ "[Garg.API.Ngrams.Tools] filterWithRoot, unknown key: " <> unNgramsTerm r
142 Just (l',_) -> l' == lt
144 filterListWithRoot :: [ListType]
145 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
146 -> HashMap NgramsTerm (Maybe RootTerm)
147 filterListWithRoot lt m = snd <$> HM.filter isMapTerm m
149 isMapTerm (l, maybeRoot) = case maybeRoot of
151 Just r -> case HM.lookup r m of
152 Nothing -> panic $ "[Garg.API.Ngrams.Tools] filterWithRoot, unknown key: " <> unNgramsTerm r
153 Just (l',_) -> elem l' lt
155 groupNodesByNgrams :: ( At root_map
156 , Index root_map ~ NgramsTerm
157 , IxValue root_map ~ Maybe RootTerm
160 -> HashMap NgramsTerm (Set NodeId)
161 -> HashMap NgramsTerm (Set NodeId)
162 groupNodesByNgrams syn occs = HM.fromListWith (<>) occs'
164 occs' = map toSyn (HM.toList occs)
165 toSyn (t,ns) = case syn ^. at t of
166 Nothing -> panic $ "[Garg.API.Ngrams.Tools.groupNodesByNgrams] unknown key: " <> unNgramsTerm t
171 data Diagonal = Diagonal Bool
173 getCoocByNgrams :: Diagonal
174 -> HashMap NgramsTerm (Set NodeId)
175 -> HashMap (NgramsTerm, NgramsTerm) Int
176 getCoocByNgrams = getCoocByNgrams' identity
179 getCoocByNgrams' :: (Hashable a, Ord a, Ord c)
183 -> HashMap (a, a) Int
184 getCoocByNgrams' f (Diagonal diag) m =
185 HM.fromList [( (t1,t2)
186 , maybe 0 Set.size $ Set.intersection
187 <$> (fmap f $ HM.lookup t1 m)
188 <*> (fmap f $ HM.lookup t2 m)
191 then [ (x,y) | x <- ks, y <- ks, x <= y]
192 -- TODO if we keep a Data.Map here it might be
193 -- more efficient to enumerate all the y <= x.
195 listToCombi identity ks
201 -- TODO k could be either k1 or k2 here
202 getCoocByNgrams'' :: (Hashable k, Ord k, Ord contexts)
204 -> (contextA -> Set contexts, contextB -> Set contexts)
205 -> (HashMap k contextA, HashMap k contextB)
206 -> HashMap (k, k) Int
207 getCoocByNgrams'' (Diagonal diag) (f1,f2) (m1,m2) =
208 HM.fromList [( (t1,t2)
209 , maybe 0 Set.size $ Set.intersection
210 <$> (fmap f1 $ HM.lookup t1 m1)
211 <*> (fmap f2 $ HM.lookup t2 m2)
215 [ (x,y) | x <- ks1, y <- ks2, x <= y]
216 -- TODO if we keep a Data.Map here it might be
217 -- more efficient to enumerate all the y <= x.
219 [ (x,y) | x <- ks1, y <- ks2, x < y]
221 -- listToCombi identity ks1
229 ------------------------------------------
232 migrateFromDirToDb :: (CmdM env err m) -- , HasNodeStory env err m)
234 migrateFromDirToDb = do
235 pool <- view connPool
236 withResource pool $ \c -> do
237 listIds <- liftBase $ getNodesIdWithType c NodeList
238 printDebug "[migrateFromDirToDb] listIds" listIds
239 (NodeStory nls) <- NSF.getRepoReadConfig listIds
240 printDebug "[migrateFromDirToDb] nls" nls
241 _ <- mapM (\(nId, a) -> do
242 n <- liftBase $ nodeExists c nId
245 True -> liftBase $ upsertNodeStories c nId a
247 --_ <- nodeStoryIncs (Just $ NodeStory nls) listIds