]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams/Tools.hs
Merge branch 'dev' into dev-doc-annotation-issue
[gargantext.git] / src / Gargantext / API / Ngrams / Tools.hs
1 {-|
2 Module : Gargantext.API.Ngrams.Tools
3 Description : Tools to manage Ngrams Elements (from the API)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12
13 module Gargantext.API.Ngrams.Tools
14 where
15
16 import Control.Concurrent
17 import Control.Lens (_Just, (^.), at, view)
18 import Control.Monad.Reader
19 import Data.Map.Strict (Map)
20 import Data.Set (Set)
21 import Data.Text (Text)
22 import Data.Validity
23 import Gargantext.API.Ngrams
24 import Gargantext.Core.Types (ListType(..), NodeId, ListId)
25 import Gargantext.Database.Schema.Ngrams (NgramsType)
26 import Gargantext.Prelude
27 import qualified Data.Map.Strict as Map
28 import qualified Data.Set as Set
29
30 type RootTerm = Text
31
32 getRepo :: RepoCmdM env err m => m NgramsRepo
33 getRepo = do
34 v <- view repoVar
35 liftBase $ readMVar v
36
37 listNgramsFromRepo :: [ListId] -> NgramsType
38 -> NgramsRepo -> Map Text NgramsRepoElement
39 listNgramsFromRepo nodeIds ngramsType repo = ngrams
40 where
41 ngramsMap = repo ^. r_state . at ngramsType . _Just
42
43 ngrams = Map.unionsWith mergeNgramsElement
44 [ ngramsMap ^. at nodeId . _Just | nodeId <- nodeIds ]
45
46 -- TODO-ACCESS: We want to do the security check before entering here.
47 -- Add a static capability parameter would be nice.
48 -- Ideally this is the access to `repoVar` which needs to
49 -- be properly guarded.
50 getListNgrams :: RepoCmdM env err m
51 => [ListId] -> NgramsType
52 -> m (Map Text NgramsRepoElement)
53 getListNgrams nodeIds ngramsType = listNgramsFromRepo nodeIds ngramsType <$> getRepo
54
55 getTermsWith :: (RepoCmdM env err m, Ord a)
56 => (Text -> a ) -> [ListId]
57 -> NgramsType -> ListType
58 -> m (Map a [a])
59 getTermsWith f ls ngt lt = Map.fromListWith (<>)
60 <$> map (toTreeWith f)
61 <$> Map.toList
62 <$> Map.filter (\f' -> (fst f') == lt)
63 <$> mapTermListRoot ls ngt
64 <$> getRepo
65 where
66 toTreeWith f'' (t, (_lt, maybeRoot)) = case maybeRoot of
67 Nothing -> (f'' t, [])
68 Just r -> (f'' r, map f'' [t])
69
70 mapTermListRoot :: [ListId]
71 -> NgramsType
72 -> NgramsRepo
73 -> Map Text (ListType, (Maybe Text))
74 mapTermListRoot nodeIds ngramsType repo =
75 Map.fromList [ (t, (_nre_list nre, _nre_root nre))
76 | (t, nre) <- Map.toList ngrams
77 ]
78 where ngrams = listNgramsFromRepo nodeIds ngramsType repo
79
80 filterListWithRoot :: ListType
81 -> Map Text (ListType, Maybe Text)
82 -> Map Text (Maybe RootTerm)
83 filterListWithRoot lt m = Map.fromList
84 $ map (\(t,(_,r)) -> (t,r))
85 $ filter isMapTerm (Map.toList m)
86 where
87 isMapTerm (_t,(l, maybeRoot)) = case maybeRoot of
88 Nothing -> l == lt
89 Just r -> case Map.lookup r m of
90 Nothing -> panic $ "Garg.API.Ngrams.Tools: filterWithRoot, unknown key: " <> r
91 Just (l',_) -> l' == lt
92
93 groupNodesByNgrams :: Map Text (Maybe RootTerm)
94 -> Map Text (Set NodeId)
95 -> Map Text (Set NodeId)
96 groupNodesByNgrams syn occs = Map.fromListWith (<>) occs'
97 where
98 occs' = map toSyn (Map.toList occs)
99 toSyn (t,ns) = case Map.lookup t syn of
100 Nothing -> panic $ "[Garg.API.Ngrams.Tools.groupNodesByNgrams] unknown key: " <> t
101 Just r -> case r of
102 Nothing -> (t, ns)
103 Just r' -> (r',ns)
104
105 data Diagonal = Diagonal Bool
106
107 getCoocByNgrams :: Diagonal -> Map Text (Set NodeId) -> Map (Text, Text) Int
108 getCoocByNgrams = getCoocByNgrams' identity
109
110
111 getCoocByNgrams' :: (Ord a, Ord c) => (b -> Set c) -> Diagonal -> Map a b -> Map (a, a) Int
112 getCoocByNgrams' f (Diagonal diag) m =
113 Map.fromList [( (t1,t2)
114 , maybe 0 Set.size $ Set.intersection
115 <$> (fmap f $ Map.lookup t1 m)
116 <*> (fmap f $ Map.lookup t2 m)
117 ) | (t1,t2) <- case diag of
118 True -> [ (x,y) | x <- Map.keys m, y <- Map.keys m, x <= y]
119 False -> listToCombi identity (Map.keys m)
120 ]
121