]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Schema/Ngrams.hs
[Community pairing] alignment step
[gargantext.git] / src / Gargantext / Database / Schema / Ngrams.hs
1 {-|
2 Module : Gargantext.Database.Schema.Ngrams
3 Description : Ngram connection to the Database
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Ngrams connection to the Database.
11
12 -}
13
14 {-# LANGUAGE Arrows #-}
15 {-# LANGUAGE FunctionalDependencies #-}
16 {-# LANGUAGE QuasiQuotes #-}
17 {-# LANGUAGE TemplateHaskell #-}
18
19 module Gargantext.Database.Schema.Ngrams
20 where
21
22 import Codec.Serialise (Serialise())
23 import Control.Lens (makeLenses, over)
24 import Control.Monad (mzero)
25 import Data.Aeson
26 import Data.Aeson.Types (toJSONKeyText)
27 import Data.Map (Map, fromList, lookup)
28 import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
29 import Data.Swagger (ToParamSchema, toParamSchema, ToSchema)
30 import Data.Text (Text, splitOn, pack)
31 import GHC.Generics (Generic)
32 import Gargantext.Core.Types (TODO(..))
33 import Gargantext.Prelude
34 import Prelude (Enum, Bounded, minBound, maxBound, Functor)
35 import Servant (FromHttpApiData, parseUrlPiece, Proxy(..))
36 import Text.Read (read)
37 import Gargantext.Database.Schema.Prelude
38 import qualified Database.PostgreSQL.Simple as PGS
39
40
41 type NgramsId = Int
42 type NgramsTerms = Text
43 type Size = Int
44
45 data NgramsPoly id terms n = NgramsDb { _ngrams_id :: !id
46 , _ngrams_terms :: !terms
47 , _ngrams_n :: !n
48 } deriving (Show)
49
50 type NgramsWrite = NgramsPoly (Maybe (Column PGInt4))
51 (Column PGText)
52 (Column PGInt4)
53
54 type NgramsRead = NgramsPoly (Column PGInt4)
55 (Column PGText)
56 (Column PGInt4)
57
58 type NgramsReadNull = NgramsPoly (Column (Nullable PGInt4))
59 (Column (Nullable PGText))
60 (Column (Nullable PGInt4))
61
62 type NgramsDb = NgramsPoly Int Text Int
63
64 $(makeAdaptorAndInstance "pNgramsDb" ''NgramsPoly)
65 makeLenses ''NgramsPoly
66
67
68 ngramsTable :: Table NgramsWrite NgramsRead
69 ngramsTable = Table "ngrams" (pNgramsDb NgramsDb { _ngrams_id = optional "id"
70 , _ngrams_terms = required "terms"
71 , _ngrams_n = required "n"
72 }
73 )
74
75
76
77 -- | Main Ngrams Types
78 -- | Typed Ngrams
79 -- Typed Ngrams localize the context of the ngrams
80 -- ngrams in source field of document has Sources Type
81 -- ngrams in authors field of document has Authors Type
82 -- ngrams in text (title or abstract) of documents has Terms Type
83 data NgramsType = Authors | Institutes | Sources | NgramsTerms
84 deriving (Eq, Show, Read, Ord, Enum, Bounded, Generic)
85
86 instance Serialise NgramsType
87
88 ngramsTypes :: [NgramsType]
89 ngramsTypes = [minBound..]
90
91 instance ToSchema NgramsType
92 {- where
93 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
94 --}
95
96 newtype NgramsTypeId = NgramsTypeId Int
97 deriving (Eq, Show, Ord, Num)
98
99 instance ToField NgramsTypeId where
100 toField (NgramsTypeId n) = toField n
101
102 instance FromField NgramsTypeId where
103 fromField fld mdata = do
104 n <- fromField fld mdata
105 if (n :: Int) > 0 then return $ NgramsTypeId n
106 else mzero
107
108 instance FromJSON NgramsType
109 instance FromJSONKey NgramsType where
110 fromJSONKey = FromJSONKeyTextParser (parseJSON . String)
111 instance ToJSON NgramsType
112 instance ToJSONKey NgramsType where
113 toJSONKey = toJSONKeyText (pack . show)
114
115 instance FromHttpApiData NgramsType where
116 parseUrlPiece n = pure $ (read . cs) n
117
118 instance ToParamSchema NgramsType where
119 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
120
121
122 instance QueryRunnerColumnDefault (Nullable PGInt4) NgramsTypeId
123 where
124 queryRunnerColumnDefault = fieldQueryRunnerColumn
125
126 pgNgramsType :: NgramsType -> Column PGInt4
127 pgNgramsType = pgNgramsTypeId . ngramsTypeId
128
129 pgNgramsTypeId :: NgramsTypeId -> Column PGInt4
130 pgNgramsTypeId (NgramsTypeId n) = pgInt4 n
131
132 ngramsTypeId :: NgramsType -> NgramsTypeId
133 ngramsTypeId Authors = 1
134 ngramsTypeId Institutes = 2
135 ngramsTypeId Sources = 3
136 ngramsTypeId NgramsTerms = 4
137
138 fromNgramsTypeId :: NgramsTypeId -> Maybe NgramsType
139 fromNgramsTypeId id = lookup id
140 $ fromList [ (ngramsTypeId nt,nt)
141 | nt <- [minBound .. maxBound] :: [NgramsType]
142 ]
143
144 ------------------------------------------------------------------------
145 -- | TODO put it in Gargantext.Text.Ngrams
146 data Ngrams = Ngrams { _ngramsTerms :: Text
147 , _ngramsSize :: Int
148 } deriving (Generic, Show, Eq, Ord)
149
150 makeLenses ''Ngrams
151 instance PGS.ToRow Ngrams where
152 toRow (Ngrams t s) = [toField t, toField s]
153
154 text2ngrams :: Text -> Ngrams
155 text2ngrams txt = Ngrams txt $ length $ splitOn " " txt
156
157 -------------------------------------------------------------------------
158 -- | TODO put it in Gargantext.Text.Ngrams
159 -- Named entity are typed ngrams of Terms Ngrams
160 data NgramsT a =
161 NgramsT { _ngramsType :: NgramsType
162 , _ngramsT :: a
163 } deriving (Generic, Show, Eq, Ord)
164
165 makeLenses ''NgramsT
166
167 instance Functor NgramsT where
168 fmap = over ngramsT
169 -----------------------------------------------------------------------
170 data NgramsIndexed =
171 NgramsIndexed
172 { _ngrams :: Ngrams
173 , _ngramsId :: NgramsId
174 } deriving (Show, Generic, Eq, Ord)
175
176 makeLenses ''NgramsIndexed
177 ------------------------------------------------------------------------
178 data NgramIds =
179 NgramIds
180 { ngramId :: Int
181 , ngramTerms :: Text
182 } deriving (Show, Generic, Eq, Ord)
183
184 instance PGS.FromRow NgramIds where
185 fromRow = NgramIds <$> field <*> field
186
187 ----------------------
188 withMap :: Map NgramsTerms NgramsId -> NgramsTerms -> NgramsId
189 withMap m n = maybe (panic "withMap: should not happen") identity (lookup n m)
190
191 indexNgramsT :: Map NgramsTerms NgramsId -> NgramsT Ngrams -> NgramsT NgramsIndexed
192 indexNgramsT = fmap . indexNgramsWith . withMap
193
194 indexNgrams :: Map NgramsTerms NgramsId -> Ngrams -> NgramsIndexed
195 indexNgrams = indexNgramsWith . withMap
196
197 -- NP: not sure we need it anymore
198 indexNgramsTWith :: (NgramsTerms -> NgramsId) -> NgramsT Ngrams -> NgramsT NgramsIndexed
199 indexNgramsTWith = fmap . indexNgramsWith
200
201 indexNgramsWith :: (NgramsTerms -> NgramsId) -> Ngrams -> NgramsIndexed
202 indexNgramsWith f n = NgramsIndexed n (f $ _ngramsTerms n)
203
204
205
206