1 {-# LANGUAGE NoImplicitPrelude #-}
2 {-# LANGUAGE OverloadedStrings #-}
5 Implementation of EleVe Python version of papers:
9 module Gargantext.Text.Eleve where
13 import qualified Data.List as List
15 import Data.Text hiding (map)
17 import qualified Data.Map as Map
18 import Gargantext.Prelude
20 -- prop (Noeud c _e f) = c == Map.size f
21 -- TODO remove Feuille
23 example :: [[Terminal]]
24 example = map terminal
26 $ words "New York and New York is a big apple"
28 data Terminal = Terminal Text | Fin
29 deriving (Ord, Eq, Show)
31 isFin :: Terminal -> Bool
36 terminal :: [Text] -> [Terminal]
37 terminal xs = (map Terminal xs) <> [Fin]
41 data Arbre k e = Noeud { _noeud_count :: Double
43 , _noeud_fils :: Map k (Arbre k e)
45 | Feuille { _noeud_count :: Double }
48 arbreVide :: Arbre k e
51 mkArbre :: Monoid e => Double -> Map Terminal (Arbre Terminal e) -> Arbre Terminal e
53 | Map.null fils = Feuille c
54 | otherwise = Noeud c mempty fils
57 insertArbre :: [Terminal] -> Arbre Terminal () -> Arbre Terminal ()
59 insertArbre (x:xs) (Feuille c) = mkArbre (c+1) (Map.singleton x $ insertArbre xs arbreVide)
60 insertArbre (x:xs) (Noeud c _e f) = mkArbre (c+1) (case Map.lookup x f of
61 Nothing -> Map.insert x (insertArbre xs arbreVide) f
62 Just arbre -> Map.insert x (insertArbre xs arbre ) f
65 insertArbres :: [[Terminal]] -> Arbre Terminal ()
66 insertArbres = List.foldr insertArbre arbreVide
68 entropyArbre :: Arbre Terminal () -> Arbre Terminal Double
69 entropyArbre (Feuille c) = Feuille c
70 entropyArbre (Noeud c _e fils) = (Noeud c e (map entropyArbre fils))
72 e = sum $ map (\(k, f) -> case isFin k of
73 True -> (_noeud_count f) / c * log c
74 False -> - c' * log c'
76 c' = (_noeud_count f) / c
80 normalizeArbre :: Arbre Terminal Double -> Arbre Terminal Double
81 normalizeArbre (Feuille c) = Feuille c
82 normalizeArbre (Noeud c e f) = Noeud c e (Map.map (\a -> normalizeLevel a $ Map.elems f) f)
84 normalizeLevel :: Arbre Terminal Double -> [Arbre Terminal Double] -> Arbre Terminal Double
85 normalizeLevel (Feuille c) _ = Feuille c
86 normalizeLevel (Noeud c e f) ns = Noeud c ( (e-m) / v) f
88 es = map _noeud_entropy ns
92 buildArbre :: [[Terminal]] -> Arbre Terminal Double
93 buildArbre = normalizeArbre . entropyArbre . insertArbres