]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams/Tools.hs
Merge branch 'dev' of ssh://gitlab.iscpif.fr:20022/gargantext/haskell-gargantext...
[gargantext.git] / src / Gargantext / API / Ngrams / Tools.hs
1 {-|
2 Module : Gargantext.API.Ngrams.Tools
3 Description : Tools to manage Ngrams Elements (from the API)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE TypeFamilies #-}
13
14 module Gargantext.API.Ngrams.Tools
15 where
16
17 import Control.Concurrent
18 import Control.Lens (_Just, (^.), at, view, At, Index, IxValue)
19 import Control.Monad.Reader
20 import Data.HashMap.Strict (HashMap)
21 import Data.Hashable (Hashable)
22 import Data.Set (Set)
23 import Data.Validity
24 import Gargantext.API.Ngrams.Types
25 import Gargantext.Core.Types (ListType(..), NodeId, ListId)
26 import Gargantext.Database.Schema.Ngrams (NgramsType)
27 import Gargantext.Prelude
28 import qualified Data.HashMap.Strict as HM
29 import qualified Data.Map.Strict as Map
30 import qualified Data.Set as Set
31
32 mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
33 mergeNgramsElement _neOld neNew = neNew
34
35 type RootTerm = NgramsTerm
36
37 getRepo :: RepoCmdM env err m => m NgramsRepo
38 getRepo = do
39 v <- view repoVar
40 liftBase $ readMVar v
41
42 listNgramsFromRepo :: [ListId] -> NgramsType
43 -> NgramsRepo -> HashMap NgramsTerm NgramsRepoElement
44 listNgramsFromRepo nodeIds ngramsType repo = ngrams
45 where
46 ngramsMap = repo ^. r_state . at ngramsType . _Just
47
48 -- TODO HashMap linked
49 ngrams = HM.fromList $ Map.toList $ Map.unionsWith mergeNgramsElement
50 [ ngramsMap ^. at nodeId . _Just | nodeId <- nodeIds ]
51
52
53 -- TODO-ACCESS: We want to do the security check before entering here.
54 -- Add a static capability parameter would be nice.
55 -- Ideally this is the access to `repoVar` which needs to
56 -- be properly guarded.
57 getListNgrams :: RepoCmdM env err m
58 => [ListId] -> NgramsType
59 -> m (HashMap NgramsTerm NgramsRepoElement)
60 getListNgrams nodeIds ngramsType = listNgramsFromRepo nodeIds ngramsType <$> getRepo
61
62 getTermsWith :: (RepoCmdM env err m, Eq a, Hashable a)
63 => (NgramsTerm -> a) -> [ListId]
64 -> NgramsType -> Set ListType
65 -> m (HashMap a [a])
66 getTermsWith f ls ngt lts = HM.fromListWith (<>)
67 <$> map toTreeWith
68 <$> HM.toList
69 <$> HM.filter (\f' -> Set.member (fst f') lts)
70 <$> mapTermListRoot ls ngt
71 <$> getRepo
72 where
73 toTreeWith (t, (_lt, maybeRoot)) = case maybeRoot of
74 Nothing -> (f t, [])
75 Just r -> (f r, [f t])
76
77 mapTermListRoot :: [ListId]
78 -> NgramsType
79 -> NgramsRepo
80 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
81 mapTermListRoot nodeIds ngramsType repo =
82 (\nre -> (_nre_list nre, _nre_root nre))
83 <$> listNgramsFromRepo nodeIds ngramsType repo
84
85 filterListWithRootHashMap :: ListType
86 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
87 -> HashMap NgramsTerm (Maybe RootTerm)
88 filterListWithRootHashMap lt m = snd <$> HM.filter isMapTerm m
89 where
90 isMapTerm (l, maybeRoot) = case maybeRoot of
91 Nothing -> l == lt
92 Just r -> case HM.lookup r m of
93 Nothing -> panic $ "[Garg.API.Ngrams.Tools] filterWithRoot, unknown key: " <> unNgramsTerm r
94 Just (l',_) -> l' == lt
95
96 filterListWithRoot :: ListType
97 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
98 -> HashMap NgramsTerm (Maybe RootTerm)
99 filterListWithRoot lt m = snd <$> HM.filter isMapTerm m
100 where
101 isMapTerm (l, maybeRoot) = case maybeRoot of
102 Nothing -> l == lt
103 Just r -> case HM.lookup r m of
104 Nothing -> panic $ "[Garg.API.Ngrams.Tools] filterWithRoot, unknown key: " <> unNgramsTerm r
105 Just (l',_) -> l' == lt
106
107 groupNodesByNgrams :: ( At root_map
108 , Index root_map ~ NgramsTerm
109 , IxValue root_map ~ Maybe RootTerm
110 )
111 => root_map
112 -> HashMap NgramsTerm (Set NodeId)
113 -> HashMap NgramsTerm (Set NodeId)
114 groupNodesByNgrams syn occs = HM.fromListWith (<>) occs'
115 where
116 occs' = map toSyn (HM.toList occs)
117 toSyn (t,ns) = case syn ^. at t of
118 Nothing -> panic $ "[Garg.API.Ngrams.Tools.groupNodesByNgrams] unknown key: " <> unNgramsTerm t
119 Just r -> case r of
120 Nothing -> (t, ns)
121 Just r' -> (r',ns)
122
123 data Diagonal = Diagonal Bool
124
125 getCoocByNgrams :: Diagonal -> HashMap NgramsTerm (Set NodeId) -> HashMap (NgramsTerm, NgramsTerm) Int
126 getCoocByNgrams = getCoocByNgrams' identity
127
128
129 getCoocByNgrams' :: (Hashable a, Ord a, Ord c) => (b -> Set c) -> Diagonal -> HashMap a b -> HashMap (a, a) Int
130 getCoocByNgrams' f (Diagonal diag) m =
131 HM.fromList [( (t1,t2)
132 , maybe 0 Set.size $ Set.intersection
133 <$> (fmap f $ HM.lookup t1 m)
134 <*> (fmap f $ HM.lookup t2 m)
135 )
136 | (t1,t2) <- if diag then
137 [ (x,y) | x <- ks, y <- ks, x <= y] -- TODO if we keep a Data.Map here it might be
138 -- more efficient to enumerate all the y <= x.
139 else
140 listToCombi identity ks
141 ]
142
143 where ks = HM.keys m
144
145 ------------------------------------------