]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams/Tools.hs
[ngrams] async endpoint for ngrams put
[gargantext.git] / src / Gargantext / API / Ngrams / Tools.hs
1 {-|
2 Module : Gargantext.API.Ngrams.Tools
3 Description : Tools to manage Ngrams Elements (from the API)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12
13 module Gargantext.API.Ngrams.Tools
14 where
15
16 import Control.Concurrent
17 import Control.Lens (_Just, (^.), at, view)
18 import Control.Monad.Reader
19 import Data.Map.Strict (Map)
20 import qualified Data.Map.Strict as Map
21 import qualified Data.Set as Set
22 import Data.Set (Set)
23 import Data.Text (Text)
24 import Data.Validity
25
26 import Gargantext.API.Ngrams.Types
27 import Gargantext.Core.Types (ListType(..), NodeId, ListId)
28 import Gargantext.Database.Schema.Ngrams (NgramsType)
29 import Gargantext.Prelude
30
31 mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
32 mergeNgramsElement _neOld neNew = neNew
33
34 type RootTerm = Text
35
36 getRepo :: RepoCmdM env err m => m NgramsRepo
37 getRepo = do
38 v <- view repoVar
39 liftBase $ readMVar v
40
41 listNgramsFromRepo :: [ListId] -> NgramsType
42 -> NgramsRepo -> Map Text NgramsRepoElement
43 listNgramsFromRepo nodeIds ngramsType repo = Map.mapKeys unNgramsTerm ngrams
44 where
45 ngramsMap = repo ^. r_state . at ngramsType . _Just
46
47 ngrams = Map.unionsWith mergeNgramsElement
48 [ ngramsMap ^. at nodeId . _Just | nodeId <- nodeIds ]
49
50 -- TODO-ACCESS: We want to do the security check before entering here.
51 -- Add a static capability parameter would be nice.
52 -- Ideally this is the access to `repoVar` which needs to
53 -- be properly guarded.
54 getListNgrams :: RepoCmdM env err m
55 => [ListId] -> NgramsType
56 -> m (Map Text NgramsRepoElement)
57 getListNgrams nodeIds ngramsType = listNgramsFromRepo nodeIds ngramsType <$> getRepo
58
59 getTermsWith :: (RepoCmdM env err m, Ord a)
60 => (Text -> a ) -> [ListId]
61 -> NgramsType -> ListType
62 -> m (Map a [a])
63 getTermsWith f ls ngt lt = Map.fromListWith (<>)
64 <$> map (toTreeWith f)
65 <$> Map.toList
66 <$> Map.filter (\f' -> (fst f') == lt)
67 <$> mapTermListRoot ls ngt
68 <$> getRepo
69 where
70 toTreeWith f'' (t, (_lt, maybeRoot)) = case maybeRoot of
71 Nothing -> (f'' t, [])
72 Just r -> (f'' r, map f'' [t])
73
74 mapTermListRoot :: [ListId]
75 -> NgramsType
76 -> NgramsRepo
77 -> Map Text (ListType, (Maybe Text))
78 mapTermListRoot nodeIds ngramsType repo =
79 Map.fromList [ (t, (_nre_list nre, unNgramsTerm <$> _nre_root nre))
80 | (t, nre) <- Map.toList ngrams
81 ]
82 where ngrams = listNgramsFromRepo nodeIds ngramsType repo
83
84 filterListWithRoot :: ListType
85 -> Map Text (ListType, Maybe Text)
86 -> Map Text (Maybe RootTerm)
87 filterListWithRoot lt m = Map.fromList
88 $ map (\(t,(_,r)) -> (t,r))
89 $ filter isMapTerm (Map.toList m)
90 where
91 isMapTerm (_t,(l, maybeRoot)) = case maybeRoot of
92 Nothing -> l == lt
93 Just r -> case Map.lookup r m of
94 Nothing -> panic $ "Garg.API.Ngrams.Tools: filterWithRoot, unknown key: " <> r
95 Just (l',_) -> l' == lt
96
97 groupNodesByNgrams :: Map Text (Maybe RootTerm)
98 -> Map Text (Set NodeId)
99 -> Map Text (Set NodeId)
100 groupNodesByNgrams syn occs = Map.fromListWith (<>) occs'
101 where
102 occs' = map toSyn (Map.toList occs)
103 toSyn (t,ns) = case Map.lookup t syn of
104 Nothing -> panic $ "[Garg.API.Ngrams.Tools.groupNodesByNgrams] unknown key: " <> t
105 Just r -> case r of
106 Nothing -> (t, ns)
107 Just r' -> (r',ns)
108
109 data Diagonal = Diagonal Bool
110
111 getCoocByNgrams :: Diagonal -> Map Text (Set NodeId) -> Map (Text, Text) Int
112 getCoocByNgrams = getCoocByNgrams' identity
113
114
115 getCoocByNgrams' :: (Ord a, Ord c) => (b -> Set c) -> Diagonal -> Map a b -> Map (a, a) Int
116 getCoocByNgrams' f (Diagonal diag) m =
117 Map.fromList [( (t1,t2)
118 , maybe 0 Set.size $ Set.intersection
119 <$> (fmap f $ Map.lookup t1 m)
120 <*> (fmap f $ Map.lookup t2 m)
121 ) | (t1,t2) <- case diag of
122 True -> [ (x,y) | x <- Map.keys m, y <- Map.keys m, x <= y]
123 False -> listToCombi identity (Map.keys m)
124 ]
125