]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Metrics/TFICF.hs
[graph] some screenshot work
[gargantext.git] / src / Gargantext / Text / Metrics / TFICF.hs
1 {-|
2 Module : Gargantext.Text.Metrics.TFICF
3 Description : TFICF Ngrams tools
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Definition of TFICF : Term Frequency - Inverse of Context Frequency
11
12 TFICF is a generalization of [TFIDF](https://en.wikipedia.org/wiki/Tf%E2%80%93idf).
13
14 -}
15
16
17 module Gargantext.Text.Metrics.TFICF ( TFICF
18 , TficfContext(..)
19 , Total(..)
20 , Count(..)
21 , tficf
22 , sortTficf
23 )
24 where
25
26 import qualified Data.List as List
27 import Data.Map.Strict (Map, toList)
28 import qualified Data.Ord as DO (Down(..))
29 import Data.Set (Set)
30 import Data.Text
31
32 import Gargantext.Prelude
33 import Gargantext.Core.Types (Ordering(..))
34
35 path :: Text
36 path = "[G.T.Metrics.TFICF]"
37
38 type TFICF = Double
39
40 data TficfContext n m = TficfInfra n m
41 | TficfSupra n m
42 deriving (Show)
43
44 data Total = Total {unTotal :: !Double}
45 data Count = Count {unCount :: !Double}
46
47 tficf :: TficfContext Count Total
48 -> TficfContext Count Total
49 -> TFICF
50 tficf (TficfInfra (Count ic) (Total it) )
51 (TficfSupra (Count sc) (Total st) )
52 | it >= ic && st >= sc && it <= st = (ic/it) / log (sc/st)
53 | otherwise = panic $ "[ERR]" <> path <> " Frequency impossible"
54 tficf _ _ = panic $ "[ERR]" <> path <> "Undefined for these contexts"
55
56
57 sortTficf :: Ordering
58 -> (Map Text (Double, Set Text))
59 -> [ (Text,(Double, Set Text))]
60 sortTficf Down = List.sortOn (DO.Down . fst . snd) . toList
61 sortTficf Up = List.sortOn (fst . snd) . toList
62