{-| Module : Gargantext.Core.Text.Metrics.TFICF Description : TFICF Ngrams tools Copyright : (c) CNRS, 2017 License : AGPL + CECILL v3 Maintainer : team@gargantext.org Stability : experimental Portability : POSIX Definition of TFICF : Term Frequency - Inverse of Context Frequency TFICF is a generalization of [TFIDF](https://en.wikipedia.org/wiki/Tf%E2%80%93idf). -} module Gargantext.Core.Text.Metrics.TFICF ( TFICF , TficfContext(..) , Total(..) , Count(..) , tficf , sortTficf ) where import Data.Map.Strict (Map, toList) import Data.Text (Text) import Gargantext.Core.Types (Ordering(..)) import Gargantext.Prelude import qualified Data.List as List import qualified Data.Ord as DO (Down(..)) path :: Text path = "[G.T.Metrics.TFICF]" type TFICF = Double data TficfContext n m = TficfInfra n m | TficfSupra n m deriving (Show) data Total = Total {unTotal :: !Double} data Count = Count {unCount :: !Double} tficf :: TficfContext Count Total -> TficfContext Count Total -> TFICF tficf (TficfInfra (Count ic) (Total it) ) (TficfSupra (Count sc) (Total st) ) | it >= ic && st >= sc && it <= st = (it/ic) * log (st/sc) | otherwise = panic $ "[ERR]" <> path <>" Frequency impossible" tficf _ _ = panic $ "[ERR]" <> path <> "Undefined for these contexts" sortTficf :: Ordering -> Map Text Double -> [(Text, Double)] sortTficf Down = List.sortOn (DO.Down . snd) . toList sortTficf Up = List.sortOn snd . toList