]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Ext/IMT.hs
[ngrams] fix version bumpup after new term added
[gargantext.git] / src / Gargantext / Ext / IMT.hs
1 {-|
2 Module : Gargantext.API
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11
12 module Gargantext.Ext.IMT where
13
14 import Gargantext.Prelude
15 import Data.Text (Text, splitOn)
16 import Data.Map (Map)
17
18 import qualified Data.Set as S
19 import qualified Data.List as DL
20 import qualified Data.Vector as DV
21 import qualified Data.Map as M
22
23 import Gargantext.Text.Metrics.Freq as F
24 import Gargantext.Text.Corpus.Parsers.CSV as CSV
25
26 data School = School { school_shortName :: Text
27 , school_longName :: Text
28 , school_id :: Text
29 } deriving (Show, Read, Eq)
30
31 schools :: [School]
32 schools = [ School
33 ("Mines Albi-Carmaux")
34 ("Mines Albi-Carmaux - École nationale supérieure des Mines d'Albi‐Carmaux")
35 ("469216")
36 , School
37 ("Mines Alès")
38 ("EMA - École des Mines d'Alès")
39 ("6279")
40 , School
41 ("Mines Douai")
42 ("Mines Douai EMD - École des Mines de Douai")
43 ("224096")
44 , School
45 ("Mines Lille")
46 ("Mines Lille - École des Mines de Lille")
47 ("144103")
48 , School
49 ("IMT Lille Douai")
50 ("IMT Lille Douai")
51 ("497330")
52 , School
53 ("Mines Nantes")
54 ("Mines Nantes - Mines Nantes")
55 ("84538")
56 , School
57 ("Télécom Bretagne")
58 ("Télécom Bretagne")
59 ("301262")
60 , School
61 ("IMT Atlantique")
62 ("IMT Atlantique - IMT Atlantique Bretagne-Pays de la Loire")
63 ("481355")
64 , School
65 ("Mines Saint-Étienne")
66 ("Mines Saint-Étienne MSE - École des Mines de Saint-Étienne")
67 ("29212")
68 , School
69 ("Télécom École de Management")
70 ("TEM - Télécom Ecole de Management")
71 ("301442")
72 , School
73 ("IMT Business School")
74 ("IMT Business School")
75 ("542824")
76 , School
77 ("Télécom ParisTech")
78 ("Télécom ParisTech")
79 ("300362")
80 , School
81 ("Télécom SudParis")
82 ("TSP - Télécom SudParis")
83 ("352124")
84 , School
85 ("ARMINES")
86 ("ARMINES")
87 ("300362")
88 , School
89 ("Eurecom")
90 ("Eurecom")
91 ("421532")
92 , School
93 ("Mines ParisTech")
94 ("MINES ParisTech - École nationale supérieure des mines de Paris")
95 ("301492")
96 ]
97
98 mapIdSchool :: Map Text Text
99 mapIdSchool = M.fromList $ Gargantext.Prelude.map (\(School n _ i) -> (i,n)) schools
100
101 hal_data :: IO (DV.Vector CsvHal)
102 hal_data = snd <$> CSV.readCsvHal "doc/corpus_imt/Gargantext_Corpus.csv"
103
104 names :: S.Set Text
105 names = S.fromList $ Gargantext.Prelude.map (\s -> school_id s) schools
106
107 toSchoolName :: Text -> Text
108 toSchoolName t = case M.lookup t mapIdSchool of
109 Nothing -> t
110 Just t' -> t'
111
112 publisBySchool :: DV.Vector CsvHal -> [(Maybe Text, Int)]
113 publisBySchool hal_data' = Gargantext.Prelude.map (\(i,n) -> (M.lookup i mapIdSchool, n))
114 $ DL.filter (\i -> S.member (fst i) names)
115 $ DL.reverse
116 $ DL.sortOn snd
117 $ M.toList
118 $ F.freq
119 $ DL.concat
120 $ DV.toList
121 $ DV.map (\n -> splitOn ( ", ") (csvHal_instStructId_i n) )
122 $ DV.filter (\n -> csvHal_publication_year n == 2017) hal_data'
123
124