]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core.hs
[MERGE] Phylo
[gargantext.git] / src / Gargantext / Core.hs
1 {-|
2 Module : Gargantext.Core
3 Description : Supported Natural language
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE DeriveAnyClass #-}
13
14 module Gargantext.Core
15 where
16
17 import Data.Aeson
18 import Data.Either(Either(Left))
19 import Data.Hashable (Hashable)
20 import Data.Morpheus.Types (GQLType)
21 import Data.Swagger
22 import Data.Text (Text, pack)
23 import Data.Tuple.Extra (swap)
24 import GHC.Generics (Generic)
25 import Gargantext.Prelude
26 import Servant.API
27 import Test.QuickCheck
28 import qualified Data.Map as Map
29
30 ------------------------------------------------------------------------
31 -- | Language of a Text
32 -- For simplicity, we suppose text has an homogenous language
33 --
34 -- - EN == english
35 -- - FR == french
36 -- - DE == deutch
37 -- - IT == italian
38 -- - ES == spanish
39 -- - PL == polish
40 -- - ZH == chinese
41 --
42 -- ... add your language and help us to implement it (:
43
44 -- | All languages supported
45 -- NOTE: Use international country codes
46 -- https://en.wikipedia.org/wiki/List_of_ISO_3166_country_codes
47 data Lang = All
48 | DE
49 | EL
50 | EN
51 | ES
52 | FR
53 | IT
54 | PL
55 | PT
56 | RU
57 | UK
58 | ZH
59 deriving (Show, Eq, Ord, Enum, Bounded, Generic, GQLType)
60
61 instance ToJSON Lang
62 instance FromJSON Lang
63 instance ToSchema Lang where
64 declareNamedSchema = genericDeclareNamedSchemaUnrestricted defaultSchemaOptions
65 instance FromHttpApiData Lang
66 where
67 parseUrlPiece "All" = pure All
68 parseUrlPiece "DE" = pure DE
69 parseUrlPiece "EL" = pure EL
70 parseUrlPiece "EN" = pure EN
71 parseUrlPiece "ES" = pure ES
72 parseUrlPiece "FR" = pure FR
73 parseUrlPiece "IT" = pure IT
74 parseUrlPiece "PL" = pure PL
75 parseUrlPiece "PT" = pure PT
76 parseUrlPiece "RU" = pure RU
77 parseUrlPiece "UK" = pure UK
78 parseUrlPiece "ZH" = pure ZH
79 parseUrlPiece _ = Left "Unexpected value of Lang"
80 instance ToHttpApiData Lang where
81 toUrlPiece = pack . show
82 instance Hashable Lang
83
84 instance Arbitrary Lang where
85 arbitrary = arbitraryBoundedEnum
86
87 allLangs :: [Lang]
88 allLangs = [minBound .. maxBound]
89
90 class HasDBid a where
91 toDBid :: a -> Int
92 fromDBid :: Int -> a
93
94 -- NOTE: We try to use numeric codes for countries
95 -- https://en.wikipedia.org/wiki/List_of_ISO_3166_country_codes
96 -- https://en.wikipedia.org/wiki/ISO_3166-1_numeric#004
97 dbIds :: [(Lang, Int)]
98 dbIds = [ (All, 0 )
99 , (DE , 276)
100 , (EL , 300)
101 , (EN , 2 )
102 , (ES , 724)
103 , (FR , 1 )
104 , (IT , 380)
105 , (PL , 616)
106 , (PT , 620)
107 , (RU , 643)
108 , (UK , 804)
109 , (ZH , 156)
110 ]
111
112 instance HasDBid Lang where
113 toDBid lang = case Map.lookup lang $ Map.fromList dbIds of
114 Just la -> la
115 Nothing -> panic "[G.Core] Add this lang to DB ids"
116
117 fromDBid dbId = case Map.lookup dbId $ Map.fromList $ map swap dbIds of
118 Just la -> la
119 Nothing -> panic "HasDBid lang, not implemented"
120
121 ------------------------------------------------------------------------
122 data NLPServerConfig = NLPServerConfig
123 { server :: !PosTagAlgo
124 , url :: !URI }
125 deriving (Show, Eq, Generic)
126 ------------------------------------------------------------------------
127 type Form = Text
128 type Lem = Text
129 ------------------------------------------------------------------------
130 data PosTagAlgo = CoreNLP | JohnSnowServer | Spacy
131 deriving (Show, Read, Eq, Ord, Generic, GQLType)
132
133 instance Hashable PosTagAlgo
134
135 instance HasDBid PosTagAlgo where
136 toDBid CoreNLP = 1
137 toDBid JohnSnowServer = 2
138 toDBid Spacy = 3
139 fromDBid 1 = CoreNLP
140 fromDBid 2 = JohnSnowServer
141 fromDBid 3 = Spacy
142 fromDBid _ = panic "HasDBid posTagAlgo : Not implemented"