]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams/Tools.hs
[FEAT] toTermList function
[gargantext.git] / src / Gargantext / API / Ngrams / Tools.hs
1 {-|
2 Module : Gargantext.API.Ngrams.Tools
3 Description : Tools to manage Ngrams Elements (from the API)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE TypeFamilies #-}
13
14 module Gargantext.API.Ngrams.Tools
15 where
16
17 import Control.Concurrent
18 import Control.Lens (_Just, (^.), at, view, At, Index, IxValue)
19 import Control.Monad.Reader
20 import Data.HashMap.Strict (HashMap)
21 import Data.Hashable (Hashable)
22 import Data.Set (Set)
23 import Data.Validity
24 import Gargantext.API.Ngrams.Types
25 import Gargantext.Core.Types (ListType(..), NodeId, ListId)
26 import Gargantext.Database.Schema.Ngrams (NgramsType)
27 import Gargantext.Prelude
28 import qualified Data.HashMap.Strict as HM
29 import qualified Data.Map.Strict as Map
30 import qualified Data.Set as Set
31
32 mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
33 mergeNgramsElement _neOld neNew = neNew
34
35 type RootTerm = NgramsTerm
36
37 getRepo :: RepoCmdM env err m => m NgramsRepo
38 getRepo = do
39 v <- view repoVar
40 liftBase $ readMVar v
41
42 listNgramsFromRepo :: [ListId] -> NgramsType
43 -> NgramsRepo -> HashMap NgramsTerm NgramsRepoElement
44 listNgramsFromRepo nodeIds ngramsType repo = ngrams
45 where
46 ngramsMap = repo ^. r_state . at ngramsType . _Just
47
48 -- TODO HashMap linked
49 ngrams = HM.fromList $ Map.toList $ Map.unionsWith mergeNgramsElement
50 [ ngramsMap ^. at nodeId . _Just | nodeId <- nodeIds ]
51
52
53
54 -- TODO-ACCESS: We want to do the security check before entering here.
55 -- Add a static capability parameter would be nice.
56 -- Ideally this is the access to `repoVar` which needs to
57 -- be properly guarded.
58 getListNgrams :: RepoCmdM env err m
59 => [ListId] -> NgramsType
60 -> m (HashMap NgramsTerm NgramsRepoElement)
61 getListNgrams nodeIds ngramsType = listNgramsFromRepo nodeIds ngramsType <$> getRepo
62
63 getTermsWith :: (RepoCmdM env err m, Eq a, Hashable a)
64 => (NgramsTerm -> a) -> [ListId]
65 -> NgramsType -> ListType
66 -> m (HashMap a [a])
67 getTermsWith f ls ngt lt = HM.fromListWith (<>)
68 <$> map toTreeWith
69 <$> HM.toList
70 <$> HM.filter (\f' -> fst f' == lt)
71 <$> mapTermListRoot ls ngt
72 <$> getRepo
73 where
74 toTreeWith (t, (_lt, maybeRoot)) = case maybeRoot of
75 Nothing -> (f t, [])
76 Just r -> (f r, [f t])
77
78 mapTermListRoot :: [ListId]
79 -> NgramsType
80 -> NgramsRepo
81 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
82 mapTermListRoot nodeIds ngramsType repo =
83 (\nre -> (_nre_list nre, _nre_root nre))
84 <$> listNgramsFromRepo nodeIds ngramsType repo
85
86 filterListWithRootHashMap :: ListType
87 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
88 -> HashMap NgramsTerm (Maybe RootTerm)
89 filterListWithRootHashMap lt m = snd <$> HM.filter isMapTerm m
90 where
91 isMapTerm (l, maybeRoot) = case maybeRoot of
92 Nothing -> l == lt
93 Just r -> case HM.lookup r m of
94 Nothing -> panic $ "Garg.API.Ngrams.Tools: filterWithRoot, unknown key: " <> unNgramsTerm r
95 Just (l',_) -> l' == lt
96
97 filterListWithRoot :: ListType
98 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
99 -> HashMap NgramsTerm (Maybe RootTerm)
100 filterListWithRoot lt m = snd <$> HM.filter isMapTerm m
101 where
102 isMapTerm (l, maybeRoot) = case maybeRoot of
103 Nothing -> l == lt
104 Just r -> case HM.lookup r m of
105 Nothing -> panic $ "Garg.API.Ngrams.Tools: filterWithRoot, unknown key: " <> unNgramsTerm r
106 Just (l',_) -> l' == lt
107
108 groupNodesByNgrams :: ( At root_map
109 , Index root_map ~ NgramsTerm
110 , IxValue root_map ~ Maybe RootTerm
111 )
112 => root_map
113 -> HashMap NgramsTerm (Set NodeId)
114 -> HashMap NgramsTerm (Set NodeId)
115 groupNodesByNgrams syn occs = HM.fromListWith (<>) occs'
116 where
117 occs' = map toSyn (HM.toList occs)
118 toSyn (t,ns) = case syn ^. at t of
119 Nothing -> panic $ "[Garg.API.Ngrams.Tools.groupNodesByNgrams] unknown key: " <> unNgramsTerm t
120 Just r -> case r of
121 Nothing -> (t, ns)
122 Just r' -> (r',ns)
123
124 data Diagonal = Diagonal Bool
125
126 getCoocByNgrams :: Diagonal -> HashMap NgramsTerm (Set NodeId) -> HashMap (NgramsTerm, NgramsTerm) Int
127 getCoocByNgrams = getCoocByNgrams' identity
128
129
130 getCoocByNgrams' :: (Hashable a, Ord a, Ord c) => (b -> Set c) -> Diagonal -> HashMap a b -> HashMap (a, a) Int
131 getCoocByNgrams' f (Diagonal diag) m =
132 HM.fromList [( (t1,t2)
133 , maybe 0 Set.size $ Set.intersection
134 <$> (fmap f $ HM.lookup t1 m)
135 <*> (fmap f $ HM.lookup t2 m)
136 )
137 | (t1,t2) <- if diag then
138 [ (x,y) | x <- ks, y <- ks, x <= y] -- TODO if we keep a Data.Map here it might be
139 -- more efficient to enumerate all the y <= x.
140 else
141 listToCombi identity ks
142 ]
143
144 where ks = HM.keys m
145
146 ------------------------------------------