2 Module : Gargantext.Database.Metrics
3 Description : Get Metrics from Storage (Database like)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 module Gargantext.Database.Action.Metrics
16 import Data.HashMap.Strict (HashMap)
19 import Data.Vector (Vector)
20 import Gargantext.API.Ngrams.Tools (filterListWithRoot, groupNodesByNgrams, Diagonal(..), getCoocByNgrams, mapTermListRoot, RootTerm, getRepo')
21 import Gargantext.API.Ngrams.Types (TabType(..), ngramsTypeFromTabType, NgramsTerm)
22 import Gargantext.Core.Mail.Types (HasMail)
23 import Gargantext.Core.NodeStory
24 import Gargantext.Core.Text.Metrics (scored, Scored(..), {-localMetrics, toScored-})
25 import Gargantext.Core.Types (ListType(..), Limit, NodeType(..), ContextId)
26 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
27 import Gargantext.Database.Action.Metrics.NgramsByContext (getContextsByNgramsOnlyUser{-, getTficfWith-})
28 import Gargantext.Database.Admin.Config (userMaster)
29 import Gargantext.Database.Admin.Types.Node (ListId, CorpusId)
30 import Gargantext.Database.Query.Table.Node (defaultList)
31 import Gargantext.Database.Query.Table.Node.Select
32 import Gargantext.Prelude
33 import qualified Data.HashMap.Strict as HM
34 import qualified Data.Map as Map
35 import qualified Data.Set as Set
36 import qualified Data.List as List
38 getMetrics :: FlowCmdM env err m
39 => CorpusId -> Maybe ListId -> TabType -> Maybe Limit
40 -> m (HashMap NgramsTerm (ListType, Maybe NgramsTerm), Vector (Scored NgramsTerm))
41 getMetrics cId maybeListId tabType maybeLimit = do
42 (ngs, _, myCooc) <- getNgramsCooc cId maybeListId tabType maybeLimit
44 pure (ngs, scored myCooc)
47 getNgramsCooc :: (FlowCmdM env err m)
48 => CorpusId -> Maybe ListId -> TabType -> Maybe Limit
49 -> m ( HashMap NgramsTerm (ListType, Maybe NgramsTerm)
50 , HashMap NgramsTerm (Maybe RootTerm)
51 , HashMap (NgramsTerm, NgramsTerm) Int
53 getNgramsCooc cId maybeListId tabType maybeLimit = do
54 (ngs', ngs) <- getNgrams cId maybeListId tabType
56 lId <- defaultList cId
57 lIds <- selectNodesWithUsername NodeList userMaster
59 myCooc <- HM.filter (>1) <$> getCoocByNgrams (Diagonal True)
60 <$> groupNodesByNgrams ngs
61 <$> getContextsByNgramsOnlyUser cId
63 (ngramsTypeFromTabType tabType)
64 (take' maybeLimit $ HM.keys ngs)
65 pure $ (ngs', ngs, myCooc)
67 -- Used for scores in Ngrams Table
68 getNgramsOccurrences :: (FlowCmdM env err m)
69 => CorpusId -> Maybe ListId -> TabType -> Maybe Limit
70 -> m (HashMap NgramsTerm Int)
71 getNgramsOccurrences c l t ml = HM.map Set.size <$> getNgramsContexts c l t ml
75 getNgramsContexts :: (FlowCmdM env err m)
76 => CorpusId -> Maybe ListId -> TabType -> Maybe Limit
77 -> m (HashMap NgramsTerm (Set ContextId))
78 getNgramsContexts cId maybeListId tabType maybeLimit = do
79 (_ngs', ngs) <- getNgrams cId maybeListId tabType
81 lId <- defaultList cId
82 lIds <- selectNodesWithUsername NodeList userMaster
84 -- TODO maybe add an option to group here
85 getContextsByNgramsOnlyUser cId
87 (ngramsTypeFromTabType tabType)
88 (take' maybeLimit $ HM.keys ngs)
92 -- Used for scores in Doc Table
93 getContextsNgramsScore :: (FlowCmdM env err m)
94 => CorpusId -> Maybe ListId -> TabType -> ListType -> Maybe Limit
95 -> m (Map ContextId Int)
96 getContextsNgramsScore cId maybeListId tabType listType maybeLimit
97 = Map.map Set.size <$> getContextsNgrams cId maybeListId tabType listType maybeLimit
99 getContextsNgrams :: (FlowCmdM env err m)
100 => CorpusId -> Maybe ListId -> TabType -> ListType -> Maybe Limit
101 -> m (Map ContextId (Set NgramsTerm))
102 getContextsNgrams cId maybeListId tabType listType maybeLimit = do
103 (ngs', ngs) <- getNgrams cId maybeListId tabType
104 lId <- defaultList cId
105 lIds <- selectNodesWithUsername NodeList userMaster
107 result <- groupNodesByNgrams ngs <$> getContextsByNgramsOnlyUser
110 (ngramsTypeFromTabType tabType)
113 $ HM.filter (\v -> fst v == listType) ngs'
116 pure $ Map.fromListWith (<>)
118 $ map (\(ng, contexts) -> List.zip (Set.toList contexts) (List.cycle [Set.singleton ng]))
123 getNgrams :: (HasMail env, HasNodeStory env err m)
124 => CorpusId -> Maybe ListId -> TabType
125 -> m ( HashMap NgramsTerm (ListType, Maybe NgramsTerm)
126 , HashMap NgramsTerm (Maybe RootTerm)
128 getNgrams cId maybeListId tabType = do
130 lId <- case maybeListId of
131 Nothing -> defaultList cId
132 Just lId' -> pure lId'
134 lists <- mapTermListRoot [lId] (ngramsTypeFromTabType tabType) <$> getRepo' [lId]
135 let maybeSyn = HM.unions $ map (\t -> filterListWithRoot t lists)
136 [MapTerm, StopTerm, CandidateTerm]
137 pure (lists, maybeSyn)
140 take' :: Maybe Int -> [a] -> [a]
141 take' Nothing xs = xs
142 take' (Just n) xs = take n xs