2 Module : Gargantext.Database.Schema.Ngrams
3 Description : Ngram connection to the Database
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Ngrams connection to the Database.
14 {-# LANGUAGE Arrows #-}
15 {-# LANGUAGE DeriveGeneric #-}
16 {-# LANGUAGE FlexibleContexts #-}
17 {-# LANGUAGE FlexibleInstances #-}
18 {-# LANGUAGE FunctionalDependencies #-}
19 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
20 {-# LANGUAGE MultiParamTypeClasses #-}
21 {-# LANGUAGE NoImplicitPrelude #-}
22 {-# LANGUAGE OverloadedStrings #-}
23 {-# LANGUAGE QuasiQuotes #-}
24 {-# LANGUAGE RankNTypes #-}
25 {-# LANGUAGE TemplateHaskell #-}
27 module Gargantext.Database.Schema.Ngrams
30 import Codec.Serialise (Serialise())
31 import Control.Lens (makeLenses, over)
32 import Control.Monad (mzero)
34 import Data.Aeson.Types (toJSONKeyText)
35 import Data.Map (Map, fromList, lookup)
36 import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
37 import Data.Swagger (ToParamSchema, toParamSchema, ToSchema)
38 import Data.Text (Text, splitOn, pack)
39 import GHC.Generics (Generic)
40 import Gargantext.Core.Types (TODO(..))
41 import Gargantext.Prelude
42 import Prelude (Enum, Bounded, minBound, maxBound, Functor)
43 import Servant (FromHttpApiData, parseUrlPiece, Proxy(..))
44 import Text.Read (read)
45 import Gargantext.Database.Schema.Prelude
46 import qualified Database.PostgreSQL.Simple as PGS
50 type NgramsTerms = Text
53 data NgramsPoly id terms n = NgramsDb { _ngrams_id :: !id
54 , _ngrams_terms :: !terms
58 type NgramsWrite = NgramsPoly (Maybe (Column PGInt4))
62 type NgramsRead = NgramsPoly (Column PGInt4)
66 type NgramsReadNull = NgramsPoly (Column (Nullable PGInt4))
67 (Column (Nullable PGText))
68 (Column (Nullable PGInt4))
70 type NgramsDb = NgramsPoly Int Text Int
72 $(makeAdaptorAndInstance "pNgramsDb" ''NgramsPoly)
73 makeLenses ''NgramsPoly
76 ngramsTable :: Table NgramsWrite NgramsRead
77 ngramsTable = Table "ngrams" (pNgramsDb NgramsDb { _ngrams_id = optional "id"
78 , _ngrams_terms = required "terms"
79 , _ngrams_n = required "n"
85 -- | Main Ngrams Types
87 -- Typed Ngrams localize the context of the ngrams
88 -- ngrams in source field of document has Sources Type
89 -- ngrams in authors field of document has Authors Type
90 -- ngrams in text (title or abstract) of documents has Terms Type
91 data NgramsType = Authors | Institutes | Sources | NgramsTerms
92 deriving (Eq, Show, Read, Ord, Enum, Bounded, Generic)
94 instance Serialise NgramsType
96 ngramsTypes :: [NgramsType]
97 ngramsTypes = [minBound..]
99 instance ToSchema NgramsType
101 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
104 newtype NgramsTypeId = NgramsTypeId Int
105 deriving (Eq, Show, Ord, Num)
107 instance ToField NgramsTypeId where
108 toField (NgramsTypeId n) = toField n
110 instance FromField NgramsTypeId where
111 fromField fld mdata = do
112 n <- fromField fld mdata
113 if (n :: Int) > 0 then return $ NgramsTypeId n
116 instance FromJSON NgramsType
117 instance FromJSONKey NgramsType where
118 fromJSONKey = FromJSONKeyTextParser (parseJSON . String)
119 instance ToJSON NgramsType
120 instance ToJSONKey NgramsType where
121 toJSONKey = toJSONKeyText (pack . show)
123 instance FromHttpApiData NgramsType where
124 parseUrlPiece n = pure $ (read . cs) n
126 instance ToParamSchema NgramsType where
127 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
130 instance QueryRunnerColumnDefault (Nullable PGInt4) NgramsTypeId
132 queryRunnerColumnDefault = fieldQueryRunnerColumn
134 pgNgramsType :: NgramsType -> Column PGInt4
135 pgNgramsType = pgNgramsTypeId . ngramsTypeId
137 pgNgramsTypeId :: NgramsTypeId -> Column PGInt4
138 pgNgramsTypeId (NgramsTypeId n) = pgInt4 n
140 ngramsTypeId :: NgramsType -> NgramsTypeId
141 ngramsTypeId Authors = 1
142 ngramsTypeId Institutes = 2
143 ngramsTypeId Sources = 3
144 ngramsTypeId NgramsTerms = 4
146 fromNgramsTypeId :: NgramsTypeId -> Maybe NgramsType
147 fromNgramsTypeId id = lookup id
148 $ fromList [ (ngramsTypeId nt,nt)
149 | nt <- [minBound .. maxBound] :: [NgramsType]
152 ------------------------------------------------------------------------
153 -- | TODO put it in Gargantext.Text.Ngrams
154 data Ngrams = Ngrams { _ngramsTerms :: Text
156 } deriving (Generic, Show, Eq, Ord)
159 instance PGS.ToRow Ngrams where
160 toRow (Ngrams t s) = [toField t, toField s]
162 text2ngrams :: Text -> Ngrams
163 text2ngrams txt = Ngrams txt $ length $ splitOn " " txt
165 -------------------------------------------------------------------------
166 -- | TODO put it in Gargantext.Text.Ngrams
167 -- Named entity are typed ngrams of Terms Ngrams
169 NgramsT { _ngramsType :: NgramsType
171 } deriving (Generic, Show, Eq, Ord)
175 instance Functor NgramsT where
177 -----------------------------------------------------------------------
181 , _ngramsId :: NgramsId
182 } deriving (Show, Generic, Eq, Ord)
184 makeLenses ''NgramsIndexed
185 ------------------------------------------------------------------------
190 } deriving (Show, Generic, Eq, Ord)
192 instance PGS.FromRow NgramIds where
193 fromRow = NgramIds <$> field <*> field
195 ----------------------
196 withMap :: Map NgramsTerms NgramsId -> NgramsTerms -> NgramsId
197 withMap m n = maybe (panic "withMap: should not happen") identity (lookup n m)
199 indexNgramsT :: Map NgramsTerms NgramsId -> NgramsT Ngrams -> NgramsT NgramsIndexed
200 indexNgramsT = fmap . indexNgramsWith . withMap
202 indexNgrams :: Map NgramsTerms NgramsId -> Ngrams -> NgramsIndexed
203 indexNgrams = indexNgramsWith . withMap
205 -- NP: not sure we need it anymore
206 indexNgramsTWith :: (NgramsTerms -> NgramsId) -> NgramsT Ngrams -> NgramsT NgramsIndexed
207 indexNgramsTWith = fmap . indexNgramsWith
209 indexNgramsWith :: (NgramsTerms -> NgramsId) -> Ngrams -> NgramsIndexed
210 indexNgramsWith f n = NgramsIndexed n (f $ _ngramsTerms n)