2 Module : Gargantext.API.Ngrams.Tools
3 Description : Tools to manage Ngrams Elements (from the API)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE TypeFamilies #-}
14 module Gargantext.API.Ngrams.Tools
17 import Control.Concurrent
18 import Control.Lens (_Just, (^.), at, view, At, Index, IxValue)
19 import Control.Monad.Reader
20 import Data.HashMap.Strict (HashMap)
21 import Data.Hashable (Hashable)
24 import Gargantext.API.Ngrams.Types
25 import Gargantext.Core.Types (ListType(..), NodeId, NodeType(..), ListId)
26 import Gargantext.Database.Prelude (CmdM, HasConnectionPool(..))
27 import Gargantext.Database.Schema.Ngrams (NgramsType)
28 import Gargantext.Prelude
29 import qualified Data.HashMap.Strict as HM
30 import qualified Data.Map.Strict as Map
31 import qualified Data.Set as Set
32 import Gargantext.Core.NodeStory
33 import qualified Gargantext.Core.NodeStoryFile as NSF
36 mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
37 mergeNgramsElement _neOld neNew = neNew
39 type RootTerm = NgramsTerm
42 getRepo :: HasNodeStory env err m
43 => [ListId] -> m NodeListStory
46 v <- liftBase $ f listIds
47 v' <- liftBase $ readMVar v
51 repoSize :: Ord k1 => NodeStory (Map.Map k1 (Map.Map k2 a)) p
54 repoSize repo node_id = Map.map Map.size state
56 state = repo ^. unNodeStory
61 getNodeStoryVar :: HasNodeStory env err m
62 => [ListId] -> m (MVar NodeListStory)
63 getNodeStoryVar l = do
69 getNodeListStory :: HasNodeStory env err m
70 => m ([NodeId] -> IO (MVar NodeListStory))
72 env <- view hasNodeStory
73 pure $ view nse_getter env
77 listNgramsFromRepo :: [ListId]
80 -> HashMap NgramsTerm NgramsRepoElement
81 listNgramsFromRepo nodeIds ngramsType repo =
82 HM.fromList $ Map.toList
83 $ Map.unionsWith mergeNgramsElement ngrams
89 . at ngramsType . _Just
93 -- TODO-ACCESS: We want to do the security check before entering here.
94 -- Add a static capability parameter would be nice.
95 -- Ideally this is the access to `repoVar` which needs to
96 -- be properly guarded.
97 getListNgrams :: HasNodeStory env err m
98 => [ListId] -> NgramsType
99 -> m (HashMap NgramsTerm NgramsRepoElement)
100 getListNgrams nodeIds ngramsType = listNgramsFromRepo nodeIds ngramsType
104 getTermsWith :: (HasNodeStory env err m, Eq a, Hashable a)
105 => (NgramsTerm -> a) -> [ListId]
106 -> NgramsType -> Set ListType
108 getTermsWith f ls ngt lts = HM.fromListWith (<>)
111 <$> HM.filter (\f' -> Set.member (fst f') lts)
112 <$> mapTermListRoot ls ngt
115 toTreeWith (t, (_lt, maybeRoot)) = case maybeRoot of
117 Just r -> (f r, [f t])
121 mapTermListRoot :: [ListId]
124 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
125 mapTermListRoot nodeIds ngramsType repo =
126 (\nre -> (_nre_list nre, _nre_root nre))
127 <$> listNgramsFromRepo nodeIds ngramsType repo
132 filterListWithRootHashMap :: ListType
133 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
134 -> HashMap NgramsTerm (Maybe RootTerm)
135 filterListWithRootHashMap lt m = snd <$> HM.filter isMapTerm m
137 isMapTerm (l, maybeRoot) = case maybeRoot of
139 Just r -> case HM.lookup r m of
140 Nothing -> panic $ "[Garg.API.Ngrams.Tools] filterWithRoot, unknown key: " <> unNgramsTerm r
141 Just (l',_) -> l' == lt
143 filterListWithRoot :: [ListType]
144 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
145 -> HashMap NgramsTerm (Maybe RootTerm)
146 filterListWithRoot lt m = snd <$> HM.filter isMapTerm m
148 isMapTerm (l, maybeRoot) = case maybeRoot of
150 Just r -> case HM.lookup r m of
151 Nothing -> panic $ "[Garg.API.Ngrams.Tools] filterWithRoot, unknown key: " <> unNgramsTerm r
152 Just (l',_) -> elem l' lt
154 groupNodesByNgrams :: ( At root_map
155 , Index root_map ~ NgramsTerm
156 , IxValue root_map ~ Maybe RootTerm
159 -> HashMap NgramsTerm (Set NodeId)
160 -> HashMap NgramsTerm (Set NodeId)
161 groupNodesByNgrams syn occs = HM.fromListWith (<>) occs'
163 occs' = map toSyn (HM.toList occs)
164 toSyn (t,ns) = case syn ^. at t of
165 Nothing -> panic $ "[Garg.API.Ngrams.Tools.groupNodesByNgrams] unknown key: " <> unNgramsTerm t
170 data Diagonal = Diagonal Bool
172 getCoocByNgrams :: Diagonal
173 -> HashMap NgramsTerm (Set NodeId)
174 -> HashMap (NgramsTerm, NgramsTerm) Int
175 getCoocByNgrams = getCoocByNgrams' identity
178 getCoocByNgrams' :: (Hashable a, Ord a, Ord c)
182 -> HashMap (a, a) Int
183 getCoocByNgrams' f (Diagonal diag) m =
184 HM.fromList [( (t1,t2)
185 , maybe 0 Set.size $ Set.intersection
186 <$> (fmap f $ HM.lookup t1 m)
187 <*> (fmap f $ HM.lookup t2 m)
189 | (t1,t2) <- if diag then
190 [ (x,y) | x <- ks, y <- ks, x <= y] -- TODO if we keep a Data.Map here it might be
191 -- more efficient to enumerate all the y <= x.
193 listToCombi identity ks
198 ------------------------------------------
201 migrateFromDirToDb :: (CmdM env err m, HasNodeStory env err m)
203 migrateFromDirToDb = do
204 pool <- view connPool
205 listIds <- liftBase $ getNodesIdWithType pool NodeList
206 printDebug "[migrateFromDirToDb] listIds" listIds
207 (NodeStory nls) <- NSF.getRepoReadConfig listIds
208 printDebug "[migrateFromDirToDb] nls" nls
209 _ <- mapM (\(nId, a) -> do
210 n <- liftBase $ nodeExists pool nId
213 True -> liftBase $ upsertNodeArchive pool nId a
215 --_ <- nodeStoryIncs (Just $ NodeStory nls) listIds