]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams/Tools.hs
Continue refactoring...
[gargantext.git] / src / Gargantext / API / Ngrams / Tools.hs
1 {-|
2 Module : Gargantext.API.Ngrams.Tools
3 Description : Tools to manage Ngrams Elements (from the API)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE TypeFamilies #-}
13
14 module Gargantext.API.Ngrams.Tools
15 where
16
17 import Control.Concurrent
18 import Control.Lens (_Just, (^.), at, view, At, Index, IxValue)
19 import Control.Monad.Reader
20 import Data.Hashable (Hashable)
21 import Data.HashMap.Strict (HashMap)
22 import qualified Data.HashMap.Strict as HM
23 import Gargantext.Data.HashMap.Strict.Utils as HM
24 import Data.Map.Strict (Map)
25 import qualified Data.Map.Strict as Map
26 import qualified Data.Set as Set
27 import Data.Set (Set)
28 import Data.Text (Text)
29 import Data.Validity
30
31 import Gargantext.API.Ngrams.Types
32 import Gargantext.Core.Types (ListType(..), NodeId, ListId)
33 import Gargantext.Database.Schema.Ngrams (NgramsType)
34 import Gargantext.Prelude
35
36 mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
37 mergeNgramsElement _neOld neNew = neNew
38
39 type RootTerm = NgramsTerm
40
41 getRepo :: RepoCmdM env err m => m NgramsRepo
42 getRepo = do
43 v <- view repoVar
44 liftBase $ readMVar v
45
46 listNgramsFromRepo :: [ListId] -> NgramsType
47 -> NgramsRepo -> Map NgramsTerm NgramsRepoElement
48 listNgramsFromRepo nodeIds ngramsType repo = ngrams
49 where
50 ngramsMap = repo ^. r_state . at ngramsType . _Just
51
52 ngrams = Map.unionsWith mergeNgramsElement
53 [ ngramsMap ^. at nodeId . _Just | nodeId <- nodeIds ]
54
55 -- TODO-ACCESS: We want to do the security check before entering here.
56 -- Add a static capability parameter would be nice.
57 -- Ideally this is the access to `repoVar` which needs to
58 -- be properly guarded.
59 getListNgrams :: RepoCmdM env err m
60 => [ListId] -> NgramsType
61 -> m (Map NgramsTerm NgramsRepoElement)
62 getListNgrams nodeIds ngramsType = listNgramsFromRepo nodeIds ngramsType <$> getRepo
63
64 getTermsWith :: (RepoCmdM env err m, Eq a, Hashable a)
65 => (NgramsTerm -> a) -> [ListId]
66 -> NgramsType -> ListType
67 -> m (HashMap a [a])
68 getTermsWith f ls ngt lt = HM.fromListWith (<>)
69 <$> map toTreeWith
70 <$> Map.toList
71 <$> Map.filter (\f' -> fst f' == lt)
72 <$> mapTermListRoot ls ngt
73 <$> getRepo
74 where
75 toTreeWith (t, (_lt, maybeRoot)) = case maybeRoot of
76 Nothing -> (f t, [])
77 Just r -> (f r, [f t])
78
79 mapTermListRoot :: [ListId]
80 -> NgramsType
81 -> NgramsRepo
82 -> Map NgramsTerm (ListType, Maybe NgramsTerm)
83 mapTermListRoot nodeIds ngramsType repo =
84 (\nre -> (_nre_list nre, _nre_root nre)) <$>
85 listNgramsFromRepo nodeIds ngramsType repo
86
87 filterListWithRootHashMap :: ListType
88 -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
89 -> HashMap NgramsTerm (Maybe RootTerm)
90 filterListWithRootHashMap lt m = snd <$> HM.filter isMapTerm m
91 where
92 isMapTerm (l, maybeRoot) = case maybeRoot of
93 Nothing -> l == lt
94 Just r -> case HM.lookup r m of
95 Nothing -> panic $ "Garg.API.Ngrams.Tools: filterWithRoot, unknown key: " <> unNgramsTerm r
96 Just (l',_) -> l' == lt
97
98 filterListWithRoot :: ListType
99 -> Map NgramsTerm (ListType, Maybe NgramsTerm)
100 -> Map NgramsTerm (Maybe RootTerm)
101 filterListWithRoot lt m = snd <$> Map.filter isMapTerm m
102 where
103 isMapTerm (l, maybeRoot) = case maybeRoot of
104 Nothing -> l == lt
105 Just r -> case Map.lookup r m of
106 Nothing -> panic $ "Garg.API.Ngrams.Tools: filterWithRoot, unknown key: " <> unNgramsTerm r
107 Just (l',_) -> l' == lt
108
109 groupNodesByNgrams :: ( At root_map
110 , Index root_map ~ NgramsTerm
111 , IxValue root_map ~ Maybe RootTerm
112 )
113 => root_map
114 -> HashMap NgramsTerm (Set NodeId)
115 -> HashMap NgramsTerm (Set NodeId)
116 groupNodesByNgrams syn occs = HM.fromListWith (<>) occs'
117 where
118 occs' = map toSyn (HM.toList occs)
119 toSyn (t,ns) = case syn ^. at t of
120 Nothing -> panic $ "[Garg.API.Ngrams.Tools.groupNodesByNgrams] unknown key: " <> unNgramsTerm t
121 Just r -> case r of
122 Nothing -> (t, ns)
123 Just r' -> (r',ns)
124
125 data Diagonal = Diagonal Bool
126
127 getCoocByNgrams :: Diagonal -> HashMap Text (Set NodeId) -> HashMap (Text, Text) Int
128 getCoocByNgrams = getCoocByNgrams' identity
129
130
131 getCoocByNgrams' :: (Hashable a, Ord a, Ord c) => (b -> Set c) -> Diagonal -> HashMap a b -> HashMap (a, a) Int
132 getCoocByNgrams' f (Diagonal diag) m =
133 HM.fromList [( (t1,t2)
134 , maybe 0 Set.size $ Set.intersection
135 <$> (fmap f $ HM.lookup t1 m)
136 <*> (fmap f $ HM.lookup t2 m)
137 )
138 | (t1,t2) <- if diag then
139 [ (x,y) | x <- ks, y <- ks, x <= y] -- TODO if we keep a Data.Map here it might be
140 -- more efficient to enumerate all the y <= x.
141 else
142 listToCombi identity ks
143 ]
144
145 where ks = HM.keys m