2 Module : Gargantext.Database.Metrics.TFICF
3 Description : Ngrams by Node user and master
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE QuasiQuotes #-}
14 module Gargantext.Database.Action.Metrics.TFICF
17 -- import Debug.Trace (trace)
18 -- import Gargantext.Core (Lang(..))
19 import Data.HashMap.Strict (HashMap)
20 import qualified Data.HashMap.Strict as HM
21 import Data.Maybe (fromMaybe)
22 import Gargantext.Core
23 import Gargantext.Core.Text.Metrics.TFICF
24 import Gargantext.Database.Action.Metrics.NgramsByNode (getNodesByNgramsUser{-, getOccByNgramsOnlyFast-}, getOccByNgramsOnlyFast_withSample)
25 import Gargantext.Database.Admin.Types.Node -- (ListId, CorpusId, NodeId)
26 import Gargantext.Database.Prelude (Cmd)
27 import Gargantext.Database.Query.Table.NodeNode (selectCountDocs)
28 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
29 import Gargantext.API.Ngrams.Types
30 import Gargantext.Prelude
31 import qualified Data.Set as Set
34 getTficf :: HasDBid NodeType
38 -> Cmd err (HashMap NgramsTerm Double)
39 getTficf cId mId nt = do
40 mapTextDoubleLocal <- HM.filter (> 1)
41 <$> HM.map (fromIntegral . Set.size)
42 <$> getNodesByNgramsUser cId nt
44 mapTextDoubleGlobal <- HM.map fromIntegral
45 <$> getOccByNgramsOnlyFast mId nt (HM.keys mapTextDoubleLocal)
47 countLocal <- selectCountDocs cId
48 countGlobal <- selectCountDocs mId
50 pure $ HM.mapWithKey (\t n ->
51 tficf (TficfInfra (Count n )
52 (Total $ fromIntegral countLocal))
53 (TficfSupra (Count $ fromMaybe 0 $ HM.lookup t mapTextDoubleGlobal)
54 (Total $ fromIntegral countGlobal))
60 getScore :: HasDBid NodeType
64 -> Cmd err (HashMap NgramsTerm Double)
65 getScore cId _mId nt =
67 <$> HM.map (fromIntegral . Set.size)
68 <$> getNodesByNgramsUser cId nt
72 getTficf_withSample :: HasDBid NodeType
76 -> Cmd err (HashMap NgramsTerm Double)
77 getTficf_withSample cId mId nt = do
78 mapTextDoubleLocal <- HM.filter (> 1)
79 <$> HM.map (fromIntegral . Set.size)
80 <$> getNodesByNgramsUser cId nt
82 countLocal <- selectCountDocs cId
83 let countGlobal = countLocal * 10
85 mapTextDoubleGlobal <- HM.map fromIntegral
86 <$> getOccByNgramsOnlyFast_withSample mId countGlobal nt
87 (HM.keys mapTextDoubleLocal)
89 pure $ HM.mapWithKey (\t n ->
90 tficf (TficfInfra (Count n )
91 (Total $ fromIntegral countLocal))
92 (TficfSupra (Count $ fromMaybe 0 $ HM.lookup t mapTextDoubleGlobal)
93 (Total $ fromIntegral countGlobal))