2 Module : Gargantext.Core.Text.Metrics.TFICF
3 Description : TFICF Ngrams tools
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Definition of TFICF : Term Frequency - Inverse of Context Frequency
12 TFICF is a generalization of [TFIDF](https://en.wikipedia.org/wiki/Tf%E2%80%93idf).
17 module Gargantext.Core.Text.Metrics.TFICF ( TFICF
26 import Data.Text (Text)
27 import Gargantext.Prelude
29 import Gargantext.Core.Types (Ordering(..))
30 import Data.Map.Strict (Map, toList)
31 import qualified Data.Ord as DO (Down(..))
32 import qualified Data.List as List
35 path = "[G.T.Metrics.TFICF]"
39 data TficfContext n m = TficfInfra n m
43 data Total = Total {unTotal :: !Double}
44 data Count = Count {unCount :: !Double}
46 tficf :: TficfContext Count Total
47 -> TficfContext Count Total
49 tficf (TficfInfra (Count ic) (Total it) )
50 (TficfSupra (Count sc) (Total st) )
51 | it >= ic && st >= sc && it <= st = (ic/it) / log (sc/st)
52 | otherwise = panic $ "[ERR]" <> path <>" Frequency impossible"
53 tficf _ _ = panic $ "[ERR]" <> path <> "Undefined for these contexts"
57 -> (Map Text (Double, Set Text))
58 -> [ (Text,(Double, Set Text))]
59 sortTficf Down = List.sortOn (DO.Down . fst . snd) . toList
60 sortTficf Up = List.sortOn (fst . snd) . toList