]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Ext/IMT.hs
[FIX] Regex error with Duckling
[gargantext.git] / src / Gargantext / Core / Ext / IMT.hs
1 {-|
2 Module : Gargantext.API
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11
12 module Gargantext.Core.Ext.IMT where
13
14 import Data.Either (Either(..))
15 import Data.Map (Map)
16 import Data.Text (Text, splitOn)
17
18 import qualified Data.Set as S
19 import qualified Data.List as DL
20 import qualified Data.Vector as DV
21 import qualified Data.Map as M
22 import qualified Prelude
23
24 import Gargantext.Prelude
25
26 import Gargantext.Core.Text.Metrics.Utils as Utils
27 import Gargantext.Core.Text.Corpus.Parsers.CSV as CSV
28
29 data School = School { school_shortName :: Text
30 , school_longName :: Text
31 , school_id :: Text
32 } deriving (Show, Read, Eq)
33
34 schools :: [School]
35 schools = [ School
36 { school_shortName = "Mines Albi-Carmaux"
37 , school_longName = "Mines Albi-Carmaux - École nationale supérieure des Mines d'Albi‐Carmaux"
38 , school_id = "469216" }
39 , School
40 { school_shortName = "Mines Alès"
41 , school_longName = "EMA - École des Mines d'Alès"
42 , school_id = "6279" }
43 , School
44 { school_shortName = "Mines Douai"
45 , school_longName = "Mines Douai EMD - École des Mines de Douai"
46 , school_id = "224096" }
47 , School
48 { school_shortName = "Mines Lille"
49 , school_longName = "Mines Lille - École des Mines de Lille"
50 , school_id = "144103" }
51 , School
52 { school_shortName = "IMT Lille Douai"
53 , school_longName = "IMT Lille Douai"
54 , school_id = "497330" }
55 , School
56 { school_shortName = "Mines Nantes"
57 , school_longName = "Mines Nantes - Mines Nantes"
58 , school_id = "84538" }
59 , School
60 { school_shortName = "Télécom Bretagne"
61 , school_longName = "Télécom Bretagne"
62 , school_id = "301262" }
63 , School
64 { school_shortName = "IMT Atlantique"
65 , school_longName = "IMT Atlantique - IMT Atlantique Bretagne-Pays de la Loire"
66 , school_id = "481355" }
67 , School
68 { school_shortName = "Mines Saint-Étienne"
69 , school_longName = "Mines Saint-Étienne MSE - École des Mines de Saint-Étienne"
70 , school_id = "29212" }
71 , School
72 { school_shortName = "Télécom École de Management"
73 , school_longName = "TEM - Télécom Ecole de Management"
74 , school_id = "301442" }
75 , School
76 { school_shortName = "IMT Business School"
77 , school_longName = "IMT Business School"
78 , school_id = "542824" }
79 , School
80 { school_shortName = "Télécom ParisTech"
81 , school_longName = "Télécom ParisTech"
82 , school_id = "300362" }
83 , School
84 { school_shortName = "Télécom SudParis"
85 , school_longName = "TSP - Télécom SudParis"
86 , school_id = "352124" }
87 , School
88 { school_shortName = "ARMINES"
89 , school_longName = "ARMINES"
90 , school_id = "300362" }
91 , School
92 { school_shortName = "Eurecom"
93 , school_longName = "Eurecom"
94 , school_id = "421532" }
95 , School
96 { school_shortName = "Mines ParisTech"
97 , school_longName = "MINES ParisTech - École nationale supérieure des mines de Paris"
98 , school_id = "301492" }
99 ]
100
101 mapIdSchool :: Map Text Text
102 mapIdSchool = M.fromList $ Gargantext.Prelude.map
103 (\(School { school_shortName, school_id }) -> (school_id, school_shortName)) schools
104
105 hal_data :: IO (Either Prelude.String (DV.Vector CsvHal))
106 hal_data = do
107 r <- CSV.readCsvHal "doc/corpus_imt/Gargantext_Corpus.csv"
108 pure $ snd <$> r
109
110 names :: S.Set Text
111 names = S.fromList $ Gargantext.Prelude.map (\s -> school_id s) schools
112
113 toSchoolName :: Text -> Text
114 toSchoolName t = case M.lookup t mapIdSchool of
115 Nothing -> t
116 Just t' -> t'
117
118 publisBySchool :: DV.Vector CsvHal -> [(Maybe Text, Int)]
119 publisBySchool hal_data' = Gargantext.Prelude.map (\(i,n) -> (M.lookup i mapIdSchool, n))
120 $ DL.filter (\i -> S.member (fst i) names)
121 $ DL.reverse
122 $ DL.sortOn snd
123 $ M.toList
124 $ Utils.freq
125 $ DL.concat
126 $ DV.toList
127 $ DV.map (\n -> splitOn ( ", ") (csvHal_instStructId_i n) )
128 $ DV.filter (\n -> csvHal_publication_year n == 2017) hal_data'
129
130