]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Ext/IMT.hs
Merge branch 'bayes'
[gargantext.git] / src / Gargantext / Ext / IMT.hs
1 {-|
2 Module : Gargantext.API
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE NoImplicitPrelude #-}
12
13 module Gargantext.Ext.IMT where
14
15 import Gargantext.Prelude
16 import Data.Text (Text, pack, splitOn)
17 import Data.Map (Map)
18
19 import qualified Data.Set as S
20 import qualified Data.List as DL
21 import qualified Data.Vector as DV
22 import qualified Data.Map as M
23
24 import Gargantext.Text.Metrics.Freq as F
25 import Gargantext.Text.Parsers.CSV as CSV
26
27 data School = School { school_shortName :: Text
28 , school_longName :: Text
29 , school_id :: Text
30 } deriving (Show, Read, Eq)
31
32 schools :: [School]
33 schools = [ School
34 (pack "Mines Albi-Carmaux")
35 (pack "Mines Albi-Carmaux - École nationale supérieure des Mines d'Albi‐Carmaux")
36 (pack "469216")
37 , School
38 (pack "Mines Alès")
39 (pack "EMA - École des Mines d'Alès")
40 (pack "6279")
41
42 , School
43 (pack "Mines Douai")
44 (pack "Mines Douai EMD - École des Mines de Douai")
45 (pack "224096")
46
47 , School
48 (pack "Mines Nantes")
49 (pack "Mines Nantes - Mines Nantes")
50 (pack "84538")
51
52 -- , School
53 -- (pack "Mines ParisTech")
54 -- (pack "MINES ParisTech - École nationale supérieure des mines de Paris")
55 -- (pack "301492")
56 --
57 , School
58 (pack "Mines Saint-Étienne")
59 (pack "Mines Saint-Étienne MSE - École des Mines de Saint-Étienne")
60 (pack "29212")
61
62 , School
63 (pack "Télécom Bretagne")
64 (pack "Télécom Bretagne")
65 (pack "301262")
66
67 , School
68 (pack "Télécom École de Management")
69 (pack "TEM - Télécom Ecole de Management")
70 (pack "301442")
71
72 , School
73 (pack "Télécom ParisTech")
74 (pack "Télécom ParisTech")
75 (pack "300362")
76
77 , School
78 (pack "Télécom SudParis")
79 (pack "TSP - Télécom SudParis")
80 (pack "352124")
81
82 , School
83 (pack "IMT Atlantique")
84 (pack "IMT Atlantique - IMT Atlantique Bretagne-Pays de la Loire")
85 (pack "481355")
86 ]
87
88 mapIdSchool :: Map Text Text
89 mapIdSchool = M.fromList $ Gargantext.Prelude.map (\(School n _ i) -> (i,n)) schools
90
91 hal_data :: IO (DV.Vector CsvHal)
92 hal_data = snd <$> CSV.readHal "doc/corpus_imt/Gargantext_Corpus.csv"
93
94 names :: S.Set Text
95 names = S.fromList $ Gargantext.Prelude.map (\s -> school_id s) schools
96
97
98 publisBySchool :: DV.Vector CsvHal -> [(Maybe Text, Int)]
99 publisBySchool hal_data' = Gargantext.Prelude.map (\(i,n) -> (M.lookup i mapIdSchool, n))
100 $ DL.filter (\i -> S.member (fst i) names)
101 $ DL.reverse
102 $ DL.sortOn snd
103 $ M.toList
104 $ F.freq
105 $ DL.concat
106 $ DV.toList
107 $ DV.map (\n -> splitOn (pack ", ") (csvHal_instStructId_i n) )
108 $ DV.filter (\n -> csvHal_publication_year n == 2017) hal_data'
109