2 Module : Gargantext.API.Ngrams.Tools
3 Description : Tools to manage Ngrams Elements (from the API)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE TypeFamilies #-}
14 module Gargantext.API.Ngrams.Tools
17 import Control.Concurrent
18 import Control.Lens (_Just, (^.), at, view, At, Index, IxValue)
19 import Control.Monad.Reader
20 import Data.HashMap.Strict (HashMap)
21 import Data.Hashable (Hashable)
24 import Gargantext.API.Ngrams.Types
25 import Gargantext.Core.Types (ListType(..), NodeId, ListId)
26 import Gargantext.Database.Schema.Ngrams (NgramsType)
27 import Gargantext.Prelude
28 import qualified Data.HashMap.Strict as HM
29 import qualified Data.Map.Strict as Map
30 import qualified Data.Set as Set
32 mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
33 mergeNgramsElement _neOld neNew = neNew
35 type RootTerm = NgramsTerm
37 getRepo :: RepoCmdM env err m => m NgramsRepo
42 listNgramsFromRepo :: [ListId] -> NgramsType
43 -> NgramsRepo -> HashMap NgramsTerm NgramsRepoElement
44 listNgramsFromRepo nodeIds ngramsType repo = ngrams
46 ngramsMap = repo ^. r_state . at ngramsType . _Just
48 -- TODO HashMap linked
49 ngrams = HM.fromList $ Map.toList $ Map.unionsWith mergeNgramsElement
50 [ ngramsMap ^. at nodeId . _Just | nodeId <- nodeIds ]
53 -- TODO-ACCESS: We want to do the security check before entering here.
54 -- Add a static capability parameter would be nice.
55 -- Ideally this is the access to `repoVar` which needs to
56 -- be properly guarded.
57 getListNgrams :: RepoCmdM env err m
58 => [ListId] -> NgramsType
59 -> m (HashMap NgramsTerm NgramsRepoElement)
60 getListNgrams nodeIds ngramsType = listNgramsFromRepo nodeIds ngramsType <$> getRepo
62 getTermsWith :: (RepoCmdM env err m, Eq a, Hashable a)
63 => (NgramsTerm -> a) -> [ListId]
64 -> NgramsType -> Set ListType
66 getTermsWith f ls ngt lts = HM.fromListWith (<>)
69 <$> HM.filter (\f' -> Set.member (fst f') lts)
70 <$> mapTermListRoot ls ngt
73 toTreeWith (t, (_lt, maybeRoot)) = case maybeRoot of
75 Just r -> (f r, [f t])
77 mapTermListRoot :: [ListId]
80 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
81 mapTermListRoot nodeIds ngramsType repo =
82 (\nre -> (_nre_list nre, _nre_root nre))
83 <$> listNgramsFromRepo nodeIds ngramsType repo
85 filterListWithRootHashMap :: ListType
86 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
87 -> HashMap NgramsTerm (Maybe RootTerm)
88 filterListWithRootHashMap lt m = snd <$> HM.filter isMapTerm m
90 isMapTerm (l, maybeRoot) = case maybeRoot of
92 Just r -> case HM.lookup r m of
93 Nothing -> panic $ "[Garg.API.Ngrams.Tools] filterWithRoot, unknown key: " <> unNgramsTerm r
94 Just (l',_) -> l' == lt
96 filterListWithRoot :: ListType
97 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
98 -> HashMap NgramsTerm (Maybe RootTerm)
99 filterListWithRoot lt m = snd <$> HM.filter isMapTerm m
101 isMapTerm (l, maybeRoot) = case maybeRoot of
103 Just r -> case HM.lookup r m of
104 Nothing -> panic $ "[Garg.API.Ngrams.Tools] filterWithRoot, unknown key: " <> unNgramsTerm r
105 Just (l',_) -> l' == lt
107 groupNodesByNgrams :: ( At root_map
108 , Index root_map ~ NgramsTerm
109 , IxValue root_map ~ Maybe RootTerm
112 -> HashMap NgramsTerm (Set NodeId)
113 -> HashMap NgramsTerm (Set NodeId)
114 groupNodesByNgrams syn occs = HM.fromListWith (<>) occs'
116 occs' = map toSyn (HM.toList occs)
117 toSyn (t,ns) = case syn ^. at t of
118 Nothing -> panic $ "[Garg.API.Ngrams.Tools.groupNodesByNgrams] unknown key: " <> unNgramsTerm t
123 data Diagonal = Diagonal Bool
125 getCoocByNgrams :: Diagonal -> HashMap NgramsTerm (Set NodeId) -> HashMap (NgramsTerm, NgramsTerm) Int
126 getCoocByNgrams = getCoocByNgrams' identity
129 getCoocByNgrams' :: (Hashable a, Ord a, Ord c) => (b -> Set c) -> Diagonal -> HashMap a b -> HashMap (a, a) Int
130 getCoocByNgrams' f (Diagonal diag) m =
131 HM.fromList [( (t1,t2)
132 , maybe 0 Set.size $ Set.intersection
133 <$> (fmap f $ HM.lookup t1 m)
134 <*> (fmap f $ HM.lookup t2 m)
136 | (t1,t2) <- if diag then
137 [ (x,y) | x <- ks, y <- ks, x <= y] -- TODO if we keep a Data.Map here it might be
138 -- more efficient to enumerate all the y <= x.
140 listToCombi identity ks
145 ------------------------------------------