2 Module : Gargantext.Text.Metrics.TFICF
3 Description : TFICF Ngrams tools
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Definition of TFICF : Term Frequency - Inverse of Context Frequency
12 TFICF is a generalization of [TFIDF](https://en.wikipedia.org/wiki/Tf%E2%80%93idf).
17 module Gargantext.Text.Metrics.TFICF ( TFICF
26 import qualified Data.List as List
27 import Data.Map.Strict (Map, toList)
28 import qualified Data.Ord as DO (Down(..))
32 import Gargantext.Prelude
33 import Gargantext.Core.Types (Ordering(..))
36 path = "[G.T.Metrics.TFICF]"
40 data TficfContext n m = TficfInfra n m
44 data Total = Total {unTotal :: !Double}
45 data Count = Count {unCount :: !Double}
47 tficf :: TficfContext Count Total
48 -> TficfContext Count Total
50 tficf (TficfInfra (Count ic) (Total it) )
51 (TficfSupra (Count sc) (Total st) )
52 | it >= ic && st >= sc && it <= st = (ic/it) / log (sc/st)
53 | otherwise = panic $ "[ERR]" <> path <> " Frequency impossible"
54 tficf _ _ = panic $ "[ERR]" <> path <> "Undefined for these contexts"
58 -> (Map Text (Double, Set Text))
59 -> [ (Text,(Double, Set Text))]
60 sortTficf Down = List.sortOn (DO.Down . fst . snd) . toList
61 sortTficf Up = List.sortOn (fst . snd) . toList