]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Metrics/TFICF.hs
optimisation for temporal matching
[gargantext.git] / src / Gargantext / Text / Metrics / TFICF.hs
1 {-|
2 Module : Gargantext.Text.Metrics.TFICF
3 Description : TFICF Ngrams tools
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Definition of TFICF : Term Frequency - Inverse of Context Frequency
11
12 TFICF is a generalization of [TFIDF](https://en.wikipedia.org/wiki/Tf%E2%80%93idf).
13
14 -}
15
16 {-# LANGUAGE NoImplicitPrelude #-}
17 {-# LANGUAGE OverloadedStrings #-}
18
19 module Gargantext.Text.Metrics.TFICF ( TFICF
20 , TficfContext(..)
21 , Total(..)
22 , Count(..)
23 , tficf
24 )
25 where
26
27 import Data.Text (Text)
28 import Gargantext.Prelude
29
30 path :: Text
31 path = "Gargantext.Text.Metrics.TFICF"
32
33 type TFICF = Double
34
35 data TficfContext n m = TficfInfra n m
36 | TficfSupra n m
37 deriving (Show)
38
39 data Total = Total {unTotal :: !Double}
40 data Count = Count {unCount :: !Double}
41
42 tficf :: TficfContext Count Total
43 -> TficfContext Count Total
44 -> TFICF
45 tficf (TficfInfra (Count ic) (Total it) )
46 (TficfSupra (Count sc) (Total st) )
47 | it >= ic && st >= sc = (ic/it) / log (sc/st)
48 | otherwise = panic $ "[ERR]" <> path <>" Frequency impossible"
49 tficf _ _ = panic $ "[ERR]" <> path <> "Undefined for these contexts"
50
51