]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Action/Metrics/TFICF.hs
Revert "[OPTIM] removing long computations for now (TFICF)"
[gargantext.git] / src / Gargantext / Database / Action / Metrics / TFICF.hs
1 {-|
2 Module : Gargantext.Database.Metrics.TFICF
3 Description : Ngrams by Node user and master
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE QuasiQuotes #-}
13
14 module Gargantext.Database.Action.Metrics.TFICF
15 where
16
17 -- import Debug.Trace (trace)
18 -- import Gargantext.Core (Lang(..))
19 import Data.HashMap.Strict (HashMap)
20 import qualified Data.HashMap.Strict as HM
21 import Data.Maybe (fromMaybe)
22 import Gargantext.Core
23 import Gargantext.Core.Text.Metrics.TFICF
24 import Gargantext.Database.Action.Metrics.NgramsByNode (getNodesByNgramsUser, getOccByNgramsOnlyFast, getOccByNgramsOnlyFast_withSample)
25 import Gargantext.Database.Admin.Types.Node -- (ListId, CorpusId, NodeId)
26 import Gargantext.Database.Prelude (Cmd)
27 import Gargantext.Database.Query.Table.NodeNode (selectCountDocs)
28 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
29 import Gargantext.API.Ngrams.Types
30 import Gargantext.Prelude
31 import qualified Data.Set as Set
32
33 getTficf :: HasDBid NodeType
34 => UserCorpusId
35 -> MasterCorpusId
36 -> NgramsType
37 -> Cmd err (HashMap NgramsTerm Double)
38 getTficf cId mId nt = do
39 mapTextDoubleLocal <- HM.filter (> 1)
40 <$> HM.map (fromIntegral . Set.size)
41 <$> getNodesByNgramsUser cId nt
42
43 mapTextDoubleGlobal <- HM.map fromIntegral
44 <$> getOccByNgramsOnlyFast mId nt (HM.keys mapTextDoubleLocal)
45
46 countLocal <- selectCountDocs cId
47 countGlobal <- selectCountDocs mId
48
49 pure $ HM.mapWithKey (\t n ->
50 tficf (TficfInfra (Count n )
51 (Total $ fromIntegral countLocal))
52 (TficfSupra (Count $ fromMaybe 0 $ HM.lookup t mapTextDoubleGlobal)
53 (Total $ fromIntegral countGlobal))
54 ) mapTextDoubleLocal
55
56
57 getTficf_withSample :: HasDBid NodeType
58 => UserCorpusId
59 -> MasterCorpusId
60 -> NgramsType
61 -> Cmd err (HashMap NgramsTerm Double)
62 getTficf_withSample cId mId nt = do
63 mapTextDoubleLocal <- HM.filter (> 1)
64 <$> HM.map (fromIntegral . Set.size)
65 <$> getNodesByNgramsUser cId nt
66
67 countLocal <- selectCountDocs cId
68 let countGlobal = countLocal * 10
69
70 mapTextDoubleGlobal <- HM.map fromIntegral
71 <$> getOccByNgramsOnlyFast_withSample mId countGlobal nt
72 (HM.keys mapTextDoubleLocal)
73
74 pure $ HM.mapWithKey (\t n ->
75 tficf (TficfInfra (Count n )
76 (Total $ fromIntegral countLocal))
77 (TficfSupra (Count $ fromMaybe 0 $ HM.lookup t mapTextDoubleGlobal)
78 (Total $ fromIntegral countGlobal))
79 ) mapTextDoubleLocal
80