]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Ext/IMT.hs
[social] add selected lists support for new corpus
[gargantext.git] / src / Gargantext / Core / Ext / IMT.hs
1 {-|
2 Module : Gargantext.API
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11
12 module Gargantext.Core.Ext.IMT where
13
14 import Data.Either (Either(..))
15 import Data.Map (Map)
16 import Data.Text (Text, splitOn)
17
18 import qualified Data.Set as S
19 import qualified Data.List as DL
20 import qualified Data.Vector as DV
21 import qualified Data.Map as M
22 import qualified Prelude as Prelude
23
24 import Gargantext.Prelude
25
26 import Gargantext.Core.Text.Metrics.Utils as Utils
27 import Gargantext.Core.Text.Corpus.Parsers.CSV as CSV
28
29 data School = School { school_shortName :: Text
30 , school_longName :: Text
31 , school_id :: Text
32 } deriving (Show, Read, Eq)
33
34 schools :: [School]
35 schools = [ School
36 ("Mines Albi-Carmaux")
37 ("Mines Albi-Carmaux - École nationale supérieure des Mines d'Albi‐Carmaux")
38 ("469216")
39 , School
40 ("Mines Alès")
41 ("EMA - École des Mines d'Alès")
42 ("6279")
43 , School
44 ("Mines Douai")
45 ("Mines Douai EMD - École des Mines de Douai")
46 ("224096")
47 , School
48 ("Mines Lille")
49 ("Mines Lille - École des Mines de Lille")
50 ("144103")
51 , School
52 ("IMT Lille Douai")
53 ("IMT Lille Douai")
54 ("497330")
55 , School
56 ("Mines Nantes")
57 ("Mines Nantes - Mines Nantes")
58 ("84538")
59 , School
60 ("Télécom Bretagne")
61 ("Télécom Bretagne")
62 ("301262")
63 , School
64 ("IMT Atlantique")
65 ("IMT Atlantique - IMT Atlantique Bretagne-Pays de la Loire")
66 ("481355")
67 , School
68 ("Mines Saint-Étienne")
69 ("Mines Saint-Étienne MSE - École des Mines de Saint-Étienne")
70 ("29212")
71 , School
72 ("Télécom École de Management")
73 ("TEM - Télécom Ecole de Management")
74 ("301442")
75 , School
76 ("IMT Business School")
77 ("IMT Business School")
78 ("542824")
79 , School
80 ("Télécom ParisTech")
81 ("Télécom ParisTech")
82 ("300362")
83 , School
84 ("Télécom SudParis")
85 ("TSP - Télécom SudParis")
86 ("352124")
87 , School
88 ("ARMINES")
89 ("ARMINES")
90 ("300362")
91 , School
92 ("Eurecom")
93 ("Eurecom")
94 ("421532")
95 , School
96 ("Mines ParisTech")
97 ("MINES ParisTech - École nationale supérieure des mines de Paris")
98 ("301492")
99 ]
100
101 mapIdSchool :: Map Text Text
102 mapIdSchool = M.fromList $ Gargantext.Prelude.map (\(School n _ i) -> (i,n)) schools
103
104 hal_data :: IO (Either Prelude.String (DV.Vector CsvHal))
105 hal_data = do
106 r <- CSV.readCsvHal "doc/corpus_imt/Gargantext_Corpus.csv"
107 pure $ snd <$> r
108
109 names :: S.Set Text
110 names = S.fromList $ Gargantext.Prelude.map (\s -> school_id s) schools
111
112 toSchoolName :: Text -> Text
113 toSchoolName t = case M.lookup t mapIdSchool of
114 Nothing -> t
115 Just t' -> t'
116
117 publisBySchool :: DV.Vector CsvHal -> [(Maybe Text, Int)]
118 publisBySchool hal_data' = Gargantext.Prelude.map (\(i,n) -> (M.lookup i mapIdSchool, n))
119 $ DL.filter (\i -> S.member (fst i) names)
120 $ DL.reverse
121 $ DL.sortOn snd
122 $ M.toList
123 $ Utils.freq
124 $ DL.concat
125 $ DV.toList
126 $ DV.map (\n -> splitOn ( ", ") (csvHal_instStructId_i n) )
127 $ DV.filter (\n -> csvHal_publication_year n == 2017) hal_data'
128
129