]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams/Tools.hs
[FIX] History patch working, bug several NRE fixed, needs stemming scores now
[gargantext.git] / src / Gargantext / API / Ngrams / Tools.hs
1 {-|
2 Module : Gargantext.API.Ngrams.Tools
3 Description : Tools to manage Ngrams Elements (from the API)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12
13 module Gargantext.API.Ngrams.Tools
14 where
15
16 import Control.Concurrent
17 import Control.Lens (_Just, (^.), at, view)
18 import Control.Monad.Reader
19 import Data.Map.Strict (Map)
20 import qualified Data.Map.Strict as Map
21 import qualified Data.Set as Set
22 import Data.Set (Set)
23 import Data.Text (Text)
24 import Data.Validity
25
26 import Gargantext.API.Ngrams.Types
27 import Gargantext.Core.Types (ListType(..), NodeId, ListId)
28 import Gargantext.Database.Schema.Ngrams (NgramsType)
29 import Gargantext.Prelude
30
31 mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
32 mergeNgramsElement _neOld neNew = neNew
33
34 type RootTerm = Text
35
36 getRepo :: RepoCmdM env err m => m NgramsRepo
37 getRepo = do
38 v <- view repoVar
39 liftBase $ readMVar v
40
41 listNgramsFromRepo :: [ListId] -> NgramsType
42 -> NgramsRepo -> Map Text NgramsRepoElement
43 listNgramsFromRepo nodeIds ngramsType repo = Map.mapKeys unNgramsTerm ngrams
44 where
45 ngramsMap = repo ^. r_state . at ngramsType . _Just
46
47 ngrams = Map.unionsWith mergeNgramsElement
48 [ ngramsMap ^. at nodeId . _Just | nodeId <- nodeIds ]
49
50
51
52 -- TODO-ACCESS: We want to do the security check before entering here.
53 -- Add a static capability parameter would be nice.
54 -- Ideally this is the access to `repoVar` which needs to
55 -- be properly guarded.
56 getListNgrams :: RepoCmdM env err m
57 => [ListId] -> NgramsType
58 -> m (Map Text NgramsRepoElement)
59 getListNgrams nodeIds ngramsType = listNgramsFromRepo nodeIds ngramsType <$> getRepo
60
61 getTermsWith :: (RepoCmdM env err m, Ord a)
62 => (Text -> a ) -> [ListId]
63 -> NgramsType -> ListType
64 -> m (Map a [a])
65 getTermsWith f ls ngt lt = Map.fromListWith (<>)
66 <$> map (toTreeWith f)
67 <$> Map.toList
68 <$> Map.filter (\f' -> (fst f') == lt)
69 <$> mapTermListRoot ls ngt
70 <$> getRepo
71 where
72 toTreeWith f'' (t, (_lt, maybeRoot)) = case maybeRoot of
73 Nothing -> (f'' t, [])
74 Just r -> (f'' r, map f'' [t])
75
76 mapTermListRoot :: [ListId]
77 -> NgramsType
78 -> NgramsRepo
79 -> Map Text (ListType, (Maybe Text))
80 mapTermListRoot nodeIds ngramsType repo =
81 Map.fromList [ (t, (_nre_list nre, unNgramsTerm <$> _nre_root nre))
82 | (t, nre) <- Map.toList ngrams
83 ]
84 where ngrams = listNgramsFromRepo nodeIds ngramsType repo
85
86 filterListWithRoot :: ListType
87 -> Map Text (ListType, Maybe Text)
88 -> Map Text (Maybe RootTerm)
89 filterListWithRoot lt m = Map.fromList
90 $ map (\(t,(_,r)) -> (t,r))
91 $ filter isMapTerm (Map.toList m)
92 where
93 isMapTerm (_t,(l, maybeRoot)) = case maybeRoot of
94 Nothing -> l == lt
95 Just r -> case Map.lookup r m of
96 Nothing -> panic $ "Garg.API.Ngrams.Tools: filterWithRoot, unknown key: " <> r
97 Just (l',_) -> l' == lt
98
99 groupNodesByNgrams :: Map Text (Maybe RootTerm)
100 -> Map Text (Set NodeId)
101 -> Map Text (Set NodeId)
102 groupNodesByNgrams syn occs = Map.fromListWith (<>) occs'
103 where
104 occs' = map toSyn (Map.toList occs)
105 toSyn (t,ns) = case Map.lookup t syn of
106 Nothing -> panic $ "[Garg.API.Ngrams.Tools.groupNodesByNgrams] unknown key: " <> t
107 Just r -> case r of
108 Nothing -> (t, ns)
109 Just r' -> (r',ns)
110
111 data Diagonal = Diagonal Bool
112
113 getCoocByNgrams :: Diagonal -> Map Text (Set NodeId) -> Map (Text, Text) Int
114 getCoocByNgrams = getCoocByNgrams' identity
115
116
117 getCoocByNgrams' :: (Ord a, Ord c) => (b -> Set c) -> Diagonal -> Map a b -> Map (a, a) Int
118 getCoocByNgrams' f (Diagonal diag) m =
119 Map.fromList [( (t1,t2)
120 , maybe 0 Set.size $ Set.intersection
121 <$> (fmap f $ Map.lookup t1 m)
122 <*> (fmap f $ Map.lookup t2 m)
123 ) | (t1,t2) <- case diag of
124 True -> [ (x,y) | x <- Map.keys m, y <- Map.keys m, x <= y]
125 False -> listToCombi identity (Map.keys m)
126 ]
127