]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams/Tools.hs
[API][Node] corpus/<corpusId>/add/file endpoint
[gargantext.git] / src / Gargantext / API / Ngrams / Tools.hs
1 {-|
2 Module : Gargantext.API.Ngrams.Tools
3 Description : Tools to manage Ngrams Elements (from the API)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE NoImplicitPrelude #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE RankNTypes #-}
15
16 module Gargantext.API.Ngrams.Tools
17 where
18
19 import Control.Concurrent
20 import Control.Lens (_Just, (^.), at, view)
21 import Control.Monad.Reader
22 import Data.Map.Strict (Map)
23 import Data.Set (Set)
24 import Data.Text (Text)
25 import Data.Validity
26 import Gargantext.API.Ngrams
27 import Gargantext.Core.Types (ListType(..), NodeId, ListId)
28 import Gargantext.Database.Schema.Ngrams (NgramsType)
29 import Gargantext.Prelude
30 import qualified Data.Map.Strict as Map
31 import qualified Data.Set as Set
32
33
34 type RootTerm = Text
35
36 -- TODO-ACCESS: We want to do the security check before entering here.
37 -- Add a static capability parameter would be nice.
38 -- Ideally this is the access to `repoVar` which needs to
39 -- be properly guarded.
40 getListNgrams :: RepoCmdM env err m
41 => [ListId] -> NgramsType
42 -> m (Map Text NgramsRepoElement)
43 getListNgrams nodeIds ngramsType = do
44 v <- view repoVar
45 repo <- liftIO $ readMVar v
46
47 let
48 ngramsMap = repo ^. r_state . at ngramsType . _Just
49
50 ngrams = Map.unionsWith mergeNgramsElement
51 [ ngramsMap ^. at nodeId . _Just | nodeId <- nodeIds ]
52
53 pure ngrams
54
55 getTermsWith :: (RepoCmdM env err m, Ord a)
56 => (Text -> a ) -> [ListId]
57 -> NgramsType -> ListType
58 -> m (Map a [a])
59 getTermsWith f ls ngt lt = Map.fromListWith (<>)
60 <$> map (toTreeWith f)
61 <$> Map.toList
62 <$> Map.filter (\f' -> (fst f') == lt)
63 <$> mapTermListRoot ls ngt
64 where
65 toTreeWith f'' (t, (_lt, maybeRoot)) = case maybeRoot of
66 Nothing -> (f'' t, [])
67 Just r -> (f'' r, map f'' [t])
68
69 mapTermListRoot :: RepoCmdM env err m
70 => [ListId] -> NgramsType
71 -> m (Map Text (ListType, (Maybe Text)))
72 mapTermListRoot nodeIds ngramsType = do
73 ngrams <- getListNgrams nodeIds ngramsType
74 pure $ Map.fromList [ (t, (_nre_list nre, _nre_root nre))
75 | (t, nre) <- Map.toList ngrams
76 ]
77
78 filterListWithRoot :: ListType -> Map Text (ListType, Maybe Text)
79 -> Map Text (Maybe RootTerm)
80 filterListWithRoot lt m = Map.fromList
81 $ map (\(t,(_,r)) -> (t,r))
82 $ filter isGraphTerm (Map.toList m)
83 where
84 isGraphTerm (_t,(l, maybeRoot)) = case maybeRoot of
85 Nothing -> l == lt
86 Just r -> case Map.lookup r m of
87 Nothing -> panic $ "Garg.API.Ngrams.Tools: filterWithRoot, unknown key: " <> r
88 Just (l',_) -> l' == lt
89
90 groupNodesByNgrams :: Map Text (Maybe RootTerm)
91 -> Map Text (Set NodeId)
92 -> Map Text (Set NodeId)
93 groupNodesByNgrams syn occs = Map.fromListWith (<>) occs'
94 where
95 occs' = map toSyn (Map.toList occs)
96 toSyn (t,ns) = case Map.lookup t syn of
97 Nothing -> panic $ "[Garg.API.Ngrams.Tools.groupNodesByNgrams] unknown key: " <> t
98 Just r -> case r of
99 Nothing -> (t, ns)
100 Just r' -> (r',ns)
101
102 data Diagonal = Diagonal Bool
103
104 getCoocByNgrams :: Diagonal -> Map Text (Set NodeId) -> Map (Text, Text) Int
105 getCoocByNgrams = getCoocByNgrams' identity
106
107
108 getCoocByNgrams' :: (Ord a, Ord c) => (b -> Set c) -> Diagonal -> Map a b -> Map (a, a) Int
109 getCoocByNgrams' f (Diagonal diag) m =
110 Map.fromList [( (t1,t2)
111 , maybe 0 Set.size $ Set.intersection
112 <$> (fmap f $ Map.lookup t1 m)
113 <*> (fmap f $ Map.lookup t2 m)
114 ) | (t1,t2) <- case diag of
115 True -> [ (x,y) | x <- Map.keys m, y <- Map.keys m, x <= y]
116 False -> listToCombi identity (Map.keys m)
117 ]
118