]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Ext/IMT.hs
[ngrams] implemented doc_count column
[gargantext.git] / src / Gargantext / Core / Ext / IMT.hs
1 {-# LANGUAGE DeriveAnyClass #-}
2
3 {-|
4 Module : Gargantext.API
5 Description : Server API
6 Copyright : (c) CNRS, 2017-Present
7 License : AGPL + CECILL v3
8 Maintainer : team@gargantext.org
9 Stability : experimental
10 Portability : POSIX
11 -}
12
13
14 module Gargantext.Core.Ext.IMT where
15
16 import Data.Either (Either(..))
17 import Data.Map (Map)
18 import Data.Text (Text, splitOn)
19
20 import qualified Data.Set as S
21 import qualified Data.List as DL
22 import qualified Data.Vector as DV
23 import qualified Data.Map as M
24 import qualified Prelude
25
26 import Data.Morpheus.Types (GQLType)
27 import GHC.Generics (Generic)
28
29 import Gargantext.Prelude
30
31 import Gargantext.Core.Text.Metrics.Utils as Utils
32 import Gargantext.Core.Text.Corpus.Parsers.CSV as CSV
33
34 data School = School { school_shortName :: Text
35 , school_longName :: Text
36 , school_id :: Text
37 } deriving (Generic, GQLType, Show, Read, Eq)
38
39 schools :: [School]
40 schools =
41 [ School
42 { school_shortName = "ARMINES"
43 , school_longName = "ARMINES"
44 , school_id = "300104" }
45 , School
46 { school_shortName = "Eurecom"
47 , school_longName = "Eurecom"
48 , school_id = "421532" }
49 , School
50 { school_shortName = "IMT Atlantique"
51 , school_longName = "IMT Atlantique - IMT Atlantique Bretagne-Pays de la Loire"
52 , school_id = "481355" }
53 , School
54 { school_shortName = "IMT Business School"
55 , school_longName = "IMT Business School"
56 , school_id = "542824" }
57 , School
58 { school_shortName = "IMT Lille Douai"
59 , school_longName = "IMT Lille Douai"
60 , school_id = "497330" }
61 , School
62 { school_shortName = "IP Paris - Institut Polytechnique de Paris"
63 , school_longName = "IP Paris - Institut Polytechnique de Paris"
64 , school_id = "563936" }
65 , School
66 { school_shortName = "Mines Albi-Carmaux"
67 , school_longName = "Mines Albi-Carmaux - École nationale supérieure des Mines d'Albi‐Carmaux"
68 , school_id = "469216" }
69 , School
70 { school_shortName = "Mines Alès"
71 , school_longName = "EMA - École des Mines d'Alès"
72 , school_id = "6279" }
73 , School
74 { school_shortName = "Mines Douai"
75 , school_longName = "Mines Douai EMD - École des Mines de Douai"
76 , school_id = "224096" }
77 , School
78 { school_shortName = "Mines Lille"
79 , school_longName = "Mines Lille - École des Mines de Lille"
80 , school_id = "144103" }
81 , School
82 { school_shortName = "Mines Nantes"
83 , school_longName = "Mines Nantes - Mines Nantes"
84 , school_id = "84538" }
85 , School
86 { school_shortName = "Mines ParisTech"
87 , school_longName = "MINES ParisTech - École nationale supérieure des mines de Paris"
88 , school_id = "301492" }
89 , School
90 { school_shortName = "Mines Saint-Étienne"
91 , school_longName = "Mines Saint-Étienne MSE - École des Mines de Saint-Étienne"
92 , school_id = "29212" }
93 , School
94 { school_shortName = "Télécom Bretagne"
95 , school_longName = "Télécom Bretagne"
96 , school_id = "301262" }
97 , School
98 { school_shortName = "Télécom École de Management"
99 , school_longName = "TEM - Télécom Ecole de Management"
100 , school_id = "301442" }
101 , School
102 { school_shortName = "Télécom ParisTech"
103 , school_longName = "Télécom ParisTech"
104 , school_id = "300362" }
105 , School
106 { school_shortName = "Télécom SudParis"
107 , school_longName = "TSP - Télécom SudParis"
108 , school_id = "352124" }
109 ]
110
111 mapIdSchool :: Map Text Text
112 mapIdSchool = M.fromList $ Gargantext.Prelude.map
113 (\(School { school_shortName, school_id }) -> (school_id, school_shortName)) schools
114
115 hal_data :: IO (Either Prelude.String (DV.Vector CsvHal))
116 hal_data = do
117 r <- CSV.readCsvHal "doc/corpus_imt/Gargantext_Corpus.csv"
118 pure $ snd <$> r
119
120 names :: S.Set Text
121 names = S.fromList $ Gargantext.Prelude.map (\s -> school_id s) schools
122
123 toSchoolName :: Text -> Text
124 toSchoolName t = case M.lookup t mapIdSchool of
125 Nothing -> t
126 Just t' -> t'
127
128 publisBySchool :: DV.Vector CsvHal -> [(Maybe Text, Int)]
129 publisBySchool hal_data' = Gargantext.Prelude.map (\(i,n) -> (M.lookup i mapIdSchool, n))
130 $ DL.filter (\i -> S.member (fst i) names)
131 $ DL.reverse
132 $ DL.sortOn snd
133 $ M.toList
134 $ Utils.freq
135 $ DL.concat
136 $ DV.toList
137 $ DV.map (\n -> splitOn ( ", ") (csvHal_instStructId_i n) )
138 $ DV.filter (\n -> csvHal_publication_year n == 2017) hal_data'
139
140