]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core.hs
[openalex] add support for language filter in queries
[gargantext.git] / src / Gargantext / Core.hs
1 {-|
2 Module : Gargantext.Core
3 Description : Supported Natural language
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE DeriveAnyClass #-}
13
14 module Gargantext.Core
15 where
16
17 import Data.Aeson
18 import Data.Either(Either(Left))
19 import Data.Hashable (Hashable)
20 import Data.Morpheus.Types (GQLType)
21 import Data.Swagger
22 import Data.Text (Text, pack)
23 import Data.Tuple.Extra (swap)
24 import GHC.Generics (Generic)
25 import Gargantext.Prelude
26 import Servant.API
27 import Test.QuickCheck
28 import qualified Data.Map as Map
29
30 ------------------------------------------------------------------------
31 -- | Language of a Text
32 -- For simplicity, we suppose text has an homogenous language
33 --
34 -- - EN == english
35 -- - FR == french
36 -- - DE == deutch
37 -- - IT == italian
38 -- - ES == spanish
39 -- - PL == polish
40 -- - ZH == chinese
41 --
42 -- ... add your language and help us to implement it (:
43
44 -- | All languages supported
45 -- NOTE: Use international country codes
46 -- https://en.wikipedia.org/wiki/List_of_ISO_3166_country_codes
47 data Lang = All
48 | DE
49 | EL
50 | EN
51 | ES
52 | FR
53 | IT
54 | PL
55 | PT
56 | RU
57 | UK
58 | ZH
59 deriving (Show, Eq, Ord, Enum, Bounded, Generic, GQLType)
60
61 instance ToJSON Lang
62 instance FromJSON Lang
63 instance ToSchema Lang where
64 declareNamedSchema = genericDeclareNamedSchemaUnrestricted defaultSchemaOptions
65 instance FromHttpApiData Lang
66 where
67 parseUrlPiece "All" = pure All
68 parseUrlPiece "DE" = pure DE
69 parseUrlPiece "EL" = pure EL
70 parseUrlPiece "EN" = pure EN
71 parseUrlPiece "ES" = pure ES
72 parseUrlPiece "FR" = pure FR
73 parseUrlPiece "IT" = pure IT
74 parseUrlPiece "PL" = pure PL
75 parseUrlPiece "PT" = pure PT
76 parseUrlPiece "RU" = pure RU
77 parseUrlPiece "UK" = pure UK
78 parseUrlPiece "ZH" = pure ZH
79 parseUrlPiece _ = Left "Unexpected value of Lang"
80 instance ToHttpApiData Lang where
81 toUrlPiece = pack . show
82 instance Hashable Lang
83 instance Arbitrary Lang where
84 arbitrary = arbitraryBoundedEnum
85
86 -- | https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
87 toISO639Lang :: Lang -> Maybe Text
88 toISO639Lang All = Nothing
89 toISO639Lang DE = Just "de"
90 toISO639Lang EL = Just "el"
91 toISO639Lang EN = Just "en"
92 toISO639Lang ES = Just "es"
93 toISO639Lang FR = Just "fr"
94 toISO639Lang IT = Just "it"
95 toISO639Lang PL = Just "pl"
96 toISO639Lang PT = Just "pt"
97 toISO639Lang RU = Just "ru"
98 toISO639Lang UK = Just "uk"
99 toISO639Lang ZH = Just "zh"
100
101 allLangs :: [Lang]
102 allLangs = [minBound .. maxBound]
103
104 class HasDBid a where
105 toDBid :: a -> Int
106 fromDBid :: Int -> a
107
108 -- NOTE: We try to use numeric codes for countries
109 -- https://en.wikipedia.org/wiki/List_of_ISO_3166_country_codes
110 -- https://en.wikipedia.org/wiki/ISO_3166-1_numeric#004
111 dbIds :: [(Lang, Int)]
112 dbIds = [ (All, 0 )
113 , (DE , 276)
114 , (EL , 300)
115 , (EN , 2 )
116 , (ES , 724)
117 , (FR , 1 )
118 , (IT , 380)
119 , (PL , 616)
120 , (PT , 620)
121 , (RU , 643)
122 , (UK , 804)
123 , (ZH , 156)
124 ]
125
126 instance HasDBid Lang where
127 toDBid lang = case Map.lookup lang $ Map.fromList dbIds of
128 Just la -> la
129 Nothing -> panic "[G.Core] Add this lang to DB ids"
130
131 fromDBid dbId = case Map.lookup dbId $ Map.fromList $ map swap dbIds of
132 Just la -> la
133 Nothing -> panic "HasDBid lang, not implemented"
134
135 ------------------------------------------------------------------------
136 data NLPServerConfig = NLPServerConfig
137 { server :: !PosTagAlgo
138 , url :: !URI }
139 deriving (Show, Eq, Generic)
140 ------------------------------------------------------------------------
141 type Form = Text
142 type Lem = Text
143 ------------------------------------------------------------------------
144 data PosTagAlgo = CoreNLP | JohnSnowServer | Spacy
145 deriving (Show, Read, Eq, Ord, Generic, GQLType)
146
147 instance Hashable PosTagAlgo
148
149 instance HasDBid PosTagAlgo where
150 toDBid CoreNLP = 1
151 toDBid JohnSnowServer = 2
152 toDBid Spacy = 3
153 fromDBid 1 = CoreNLP
154 fromDBid 2 = JohnSnowServer
155 fromDBid 3 = Spacy
156 fromDBid _ = panic "HasDBid posTagAlgo : Not implemented"