]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Eleve.hs
Swagger fixes
[gargantext.git] / src / Gargantext / Text / Eleve.hs
1 {-# LANGUAGE NoImplicitPrelude #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-
4
5 Implementation of EleVe Python version of papers:
6
7
8 -}
9 module Gargantext.Text.Eleve where
10
11
12 import Data.Ord (Ord)
13 import qualified Data.List as List
14 import Data.Monoid
15 import Data.Text hiding (map)
16 import Data.Map (Map)
17 import qualified Data.Map as Map
18 import Gargantext.Prelude
19
20 -- prop (Noeud c _e f) = c == Map.size f
21 -- TODO remove Feuille
22
23 example :: [[Terminal]]
24 example = map terminal
25 $ chunkAlong 3 1
26 $ words "New York and New York is a big apple"
27
28 data Terminal = Terminal Text | Fin
29 deriving (Ord, Eq, Show)
30
31 isFin :: Terminal -> Bool
32 isFin x = case x of
33 Fin -> True
34 _ -> False
35
36 terminal :: [Text] -> [Terminal]
37 terminal xs = (map Terminal xs) <> [Fin]
38
39
40
41 data Arbre k e = Noeud { _noeud_count :: Double
42 , _noeud_entropy :: e
43 , _noeud_fils :: Map k (Arbre k e)
44 }
45 | Feuille { _noeud_count :: Double }
46 deriving (Show)
47
48 arbreVide :: Arbre k e
49 arbreVide = Feuille 0
50
51 mkArbre :: Monoid e => Double -> Map Terminal (Arbre Terminal e) -> Arbre Terminal e
52 mkArbre c fils
53 | Map.null fils = Feuille c
54 | otherwise = Noeud c mempty fils
55
56
57 insertArbre :: [Terminal] -> Arbre Terminal () -> Arbre Terminal ()
58 insertArbre [] n = n
59 insertArbre (x:xs) (Feuille c) = mkArbre (c+1) (Map.singleton x $ insertArbre xs arbreVide)
60 insertArbre (x:xs) (Noeud c _e f) = mkArbre (c+1) (case Map.lookup x f of
61 Nothing -> Map.insert x (insertArbre xs arbreVide) f
62 Just arbre -> Map.insert x (insertArbre xs arbre ) f
63 )
64
65 insertArbres :: [[Terminal]] -> Arbre Terminal ()
66 insertArbres = List.foldr insertArbre arbreVide
67
68 entropyArbre :: Arbre Terminal () -> Arbre Terminal Double
69 entropyArbre (Feuille c) = Feuille c
70 entropyArbre (Noeud c _e fils) = (Noeud c e (map entropyArbre fils))
71 where
72 e = sum $ map (\(k, f) -> case isFin k of
73 True -> (_noeud_count f) / c * log c
74 False -> - c' * log c'
75 where
76 c' = (_noeud_count f) / c
77 )
78 $ Map.toList fils
79
80 normalizeArbre :: Arbre Terminal Double -> Arbre Terminal Double
81 normalizeArbre (Feuille c) = Feuille c
82 normalizeArbre (Noeud c e f) = Noeud c e (Map.map (\a -> normalizeLevel a $ Map.elems f) f)
83
84 normalizeLevel :: Arbre Terminal Double -> [Arbre Terminal Double] -> Arbre Terminal Double
85 normalizeLevel (Feuille c) _ = Feuille c
86 normalizeLevel (Noeud c e f) ns = Noeud c ( (e-m) / v) f
87 where
88 es = map _noeud_entropy ns
89 m = mean es
90 v = variance es
91
92 buildArbre :: [[Terminal]] -> Arbre Terminal Double
93 buildArbre = normalizeArbre . entropyArbre . insertArbres
94
95
96
97
98