2 Module : Gargantext.API.Ngrams.Tools
3 Description : Tools to manage Ngrams Elements (from the API)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE TypeFamilies #-}
14 module Gargantext.API.Ngrams.Tools
17 import Control.Concurrent
18 import Control.Lens (_Just, (^.), at, view, At, Index, IxValue)
19 import Control.Monad.Reader
20 import Data.Hashable (Hashable)
21 import Data.HashMap.Strict (HashMap)
22 import qualified Data.HashMap.Strict as HM
23 import Gargantext.Data.HashMap.Strict.Utils as HM
24 import Data.Map.Strict (Map)
25 import qualified Data.Map.Strict as Map
26 import qualified Data.Set as Set
28 import Data.Text (Text)
31 import Gargantext.API.Ngrams.Types
32 import Gargantext.Core.Types (ListType(..), NodeId, ListId)
33 import Gargantext.Database.Schema.Ngrams (NgramsType)
34 import Gargantext.Prelude
36 mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
37 mergeNgramsElement _neOld neNew = neNew
39 type RootTerm = NgramsTerm
41 getRepo :: RepoCmdM env err m => m NgramsRepo
46 listNgramsFromRepo :: [ListId] -> NgramsType
47 -> NgramsRepo -> Map NgramsTerm NgramsRepoElement
48 listNgramsFromRepo nodeIds ngramsType repo = ngrams
50 ngramsMap = repo ^. r_state . at ngramsType . _Just
52 ngrams = Map.unionsWith mergeNgramsElement
53 [ ngramsMap ^. at nodeId . _Just | nodeId <- nodeIds ]
55 -- TODO-ACCESS: We want to do the security check before entering here.
56 -- Add a static capability parameter would be nice.
57 -- Ideally this is the access to `repoVar` which needs to
58 -- be properly guarded.
59 getListNgrams :: RepoCmdM env err m
60 => [ListId] -> NgramsType
61 -> m (Map NgramsTerm NgramsRepoElement)
62 getListNgrams nodeIds ngramsType = listNgramsFromRepo nodeIds ngramsType <$> getRepo
64 getTermsWith :: (RepoCmdM env err m, Eq a, Hashable a)
65 => (NgramsTerm -> a) -> [ListId]
66 -> NgramsType -> ListType
68 getTermsWith f ls ngt lt = HM.fromListWith (<>)
71 <$> Map.filter (\f' -> fst f' == lt)
72 <$> mapTermListRoot ls ngt
75 toTreeWith (t, (_lt, maybeRoot)) = case maybeRoot of
77 Just r -> (f r, [f t])
79 mapTermListRoot :: [ListId]
82 -> Map NgramsTerm (ListType, Maybe NgramsTerm)
83 mapTermListRoot nodeIds ngramsType repo =
84 (\nre -> (_nre_list nre, _nre_root nre)) <$>
85 listNgramsFromRepo nodeIds ngramsType repo
87 filterListWithRootHashMap :: ListType
88 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
89 -> HashMap NgramsTerm (Maybe RootTerm)
90 filterListWithRootHashMap lt m = snd <$> HM.filter isMapTerm m
92 isMapTerm (l, maybeRoot) = case maybeRoot of
94 Just r -> case HM.lookup r m of
95 Nothing -> panic $ "Garg.API.Ngrams.Tools: filterWithRoot, unknown key: " <> unNgramsTerm r
96 Just (l',_) -> l' == lt
98 filterListWithRoot :: ListType
99 -> Map NgramsTerm (ListType, Maybe NgramsTerm)
100 -> Map NgramsTerm (Maybe RootTerm)
101 filterListWithRoot lt m = snd <$> Map.filter isMapTerm m
103 isMapTerm (l, maybeRoot) = case maybeRoot of
105 Just r -> case Map.lookup r m of
106 Nothing -> panic $ "Garg.API.Ngrams.Tools: filterWithRoot, unknown key: " <> unNgramsTerm r
107 Just (l',_) -> l' == lt
109 groupNodesByNgrams :: ( At root_map
110 , Index root_map ~ NgramsTerm
111 , IxValue root_map ~ Maybe RootTerm
114 -> HashMap NgramsTerm (Set NodeId)
115 -> HashMap NgramsTerm (Set NodeId)
116 groupNodesByNgrams syn occs = HM.fromListWith (<>) occs'
118 occs' = map toSyn (HM.toList occs)
119 toSyn (t,ns) = case syn ^. at t of
120 Nothing -> panic $ "[Garg.API.Ngrams.Tools.groupNodesByNgrams] unknown key: " <> unNgramsTerm t
125 data Diagonal = Diagonal Bool
127 getCoocByNgrams :: Diagonal -> HashMap Text (Set NodeId) -> HashMap (Text, Text) Int
128 getCoocByNgrams = getCoocByNgrams' identity
131 getCoocByNgrams' :: (Hashable a, Ord a, Ord c) => (b -> Set c) -> Diagonal -> HashMap a b -> HashMap (a, a) Int
132 getCoocByNgrams' f (Diagonal diag) m =
133 HM.fromList [( (t1,t2)
134 , maybe 0 Set.size $ Set.intersection
135 <$> (fmap f $ HM.lookup t1 m)
136 <*> (fmap f $ HM.lookup t2 m)
138 | (t1,t2) <- if diag then
139 [ (x,y) | x <- ks, y <- ks, x <= y] -- TODO if we keep a Data.Map here it might be
140 -- more efficient to enumerate all the y <= x.
142 listToCombi identity ks