4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Here is a longer description of this module, containing some
11 commentary with @some markup@.
14 module Gargantext.Text.Hetero where
16 import Data.List.Split as S
19 import Database.PostgreSQL.Simple as PGS
21 import Gargantext.Database.Admin.Gargandb
22 import Gargantext.Database.Admin.Private
23 import Gargantext.Database.Simple
24 import Gargantext.Text.Count (occurrences)
25 import Gargantext.Text.Words (cleanText)
26 import Opaleye.Internal.Column (Column)
27 import Opaleye.PGTypes (PGInt4)
31 -- print (Prelude.map (heterogeinity . concat) $ S.chunksOf 3 t)
33 -- heterogeinity sur concat texts
34 heterogeinity' :: Int -> Int -> Int -> IO [Integer]
35 heterogeinity' corpus_id limit x = do
36 t <- getAbstract corpus_id limit
37 Prelude.mapM (dicoStruct . occurrences) $ (S.chunksOf x) . cleanText $ concat t
39 heterogeinity'' :: Int -> Int -> Int -> IO [Integer]
40 heterogeinity'' corpus_id limit size = do
41 t <- getAbstract corpus_id limit
42 Prelude.mapM (dicoStruct . occurrences) $ (S.chunksOf size) . cleanText $ concat t
45 dicoStruct :: (Integral r, Monad m) => M.Map t r -> m r
46 dicoStruct dict_occ = do
47 let keys_size = toInteger $ length $ M.keys dict_occ
48 let total_occ = sum $ Prelude.map (\(x, y) -> y) $ M.toList dict_occ
49 return $ div total_occ (fromIntegral keys_size)
51 -- heterogeinity sur UCT (Unité de Context Textuel)
52 heterogeinity :: [Char] -> IO Integer
53 heterogeinity string = do
54 let dict_occ = occurrences $ cleanText string
56 let keys_size = toInteger $ length $ M.keys dict_occ
57 let total_occ = sum $ Prelude.map (\(x, y) -> y) $ M.toList dict_occ
59 return $ div total_occ (fromIntegral keys_size)
62 --computeHeterogeinity
64 -- Opaleye.Internal.Column.Column Opaleye.PGTypes.PGInt4
65 -- -> IO (t, Integer, Integer)
66 computeHeterogeinity corpus_id = do
67 c <- PGS.connect infoGargandb
68 t <- getText c (nodeHyperdataText corpus_id)
69 heterogeinity $ Prelude.concat t
74 ,("Histoire", 1387736) -- 28
75 ,("Sciences Po", 1296892) -- 37
76 ,("Phylosophie", 1170004) -- 20
77 ,("Psychologie", 1345852) -- 37
78 ,("Sociologie", 1246452) -- 42
81 r <- Prelude.map computeHeterogeinity $ Prelude.map (\(t,id) -> id) corpus_ids