2 Module : Gargantext.Database.Schema.Ngrams
3 Description : Ngram connection to the Database
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Ngrams connection to the Database.
14 {-# LANGUAGE Arrows #-}
15 {-# LANGUAGE FunctionalDependencies #-}
16 {-# LANGUAGE QuasiQuotes #-}
17 {-# LANGUAGE TemplateHaskell #-}
19 module Gargantext.Database.Schema.Ngrams
22 import Codec.Serialise (Serialise())
23 import Control.Lens (makeLenses, over)
24 import Control.Monad (mzero)
26 import Data.Aeson.Types (toJSONKeyText)
27 import Data.Map (Map, fromList, lookup)
28 import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
29 import Data.Swagger (ToParamSchema, toParamSchema, ToSchema)
30 import Data.Text (Text, splitOn, pack)
31 import GHC.Generics (Generic)
32 import Gargantext.Core.Types (TODO(..))
33 import Gargantext.Prelude
34 import Prelude (Enum, Bounded, minBound, maxBound, Functor)
35 import Servant (FromHttpApiData, parseUrlPiece, Proxy(..))
36 import Text.Read (read)
37 import Gargantext.Database.Schema.Prelude
38 import qualified Database.PostgreSQL.Simple as PGS
42 type NgramsTerms = Text
45 data NgramsPoly id terms n = NgramsDb { _ngrams_id :: !id
46 , _ngrams_terms :: !terms
50 type NgramsWrite = NgramsPoly (Maybe (Column PGInt4))
54 type NgramsRead = NgramsPoly (Column PGInt4)
58 type NgramsReadNull = NgramsPoly (Column (Nullable PGInt4))
59 (Column (Nullable PGText))
60 (Column (Nullable PGInt4))
62 type NgramsDb = NgramsPoly Int Text Int
64 $(makeAdaptorAndInstance "pNgramsDb" ''NgramsPoly)
65 makeLenses ''NgramsPoly
68 ngramsTable :: Table NgramsWrite NgramsRead
69 ngramsTable = Table "ngrams" (pNgramsDb NgramsDb { _ngrams_id = optional "id"
70 , _ngrams_terms = required "terms"
71 , _ngrams_n = required "n"
77 -- | Main Ngrams Types
79 -- Typed Ngrams localize the context of the ngrams
80 -- ngrams in source field of document has Sources Type
81 -- ngrams in authors field of document has Authors Type
82 -- ngrams in text (title or abstract) of documents has Terms Type
83 data NgramsType = Authors | Institutes | Sources | NgramsTerms
84 deriving (Eq, Show, Read, Ord, Enum, Bounded, Generic)
86 instance Serialise NgramsType
88 ngramsTypes :: [NgramsType]
89 ngramsTypes = [minBound..]
91 instance ToSchema NgramsType
93 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
96 newtype NgramsTypeId = NgramsTypeId Int
97 deriving (Eq, Show, Ord, Num)
99 instance ToField NgramsTypeId where
100 toField (NgramsTypeId n) = toField n
102 instance FromField NgramsTypeId where
103 fromField fld mdata = do
104 n <- fromField fld mdata
105 if (n :: Int) > 0 then return $ NgramsTypeId n
108 instance FromJSON NgramsType
109 instance FromJSONKey NgramsType where
110 fromJSONKey = FromJSONKeyTextParser (parseJSON . String)
111 instance ToJSON NgramsType
112 instance ToJSONKey NgramsType where
113 toJSONKey = toJSONKeyText (pack . show)
115 instance FromHttpApiData NgramsType where
116 parseUrlPiece n = pure $ (read . cs) n
118 instance ToParamSchema NgramsType where
119 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
122 instance QueryRunnerColumnDefault (Nullable PGInt4) NgramsTypeId
124 queryRunnerColumnDefault = fieldQueryRunnerColumn
126 pgNgramsType :: NgramsType -> Column PGInt4
127 pgNgramsType = pgNgramsTypeId . ngramsTypeId
129 pgNgramsTypeId :: NgramsTypeId -> Column PGInt4
130 pgNgramsTypeId (NgramsTypeId n) = pgInt4 n
132 ngramsTypeId :: NgramsType -> NgramsTypeId
133 ngramsTypeId Authors = 1
134 ngramsTypeId Institutes = 2
135 ngramsTypeId Sources = 3
136 ngramsTypeId NgramsTerms = 4
138 fromNgramsTypeId :: NgramsTypeId -> Maybe NgramsType
139 fromNgramsTypeId id = lookup id
140 $ fromList [ (ngramsTypeId nt,nt)
141 | nt <- [minBound .. maxBound] :: [NgramsType]
144 ------------------------------------------------------------------------
145 -- | TODO put it in Gargantext.Text.Ngrams
146 data Ngrams = Ngrams { _ngramsTerms :: Text
148 } deriving (Generic, Show, Eq, Ord)
151 instance PGS.ToRow Ngrams where
152 toRow (Ngrams t s) = [toField t, toField s]
154 text2ngrams :: Text -> Ngrams
155 text2ngrams txt = Ngrams txt $ length $ splitOn " " txt
157 -------------------------------------------------------------------------
158 -- | TODO put it in Gargantext.Text.Ngrams
159 -- Named entity are typed ngrams of Terms Ngrams
161 NgramsT { _ngramsType :: NgramsType
163 } deriving (Generic, Show, Eq, Ord)
167 instance Functor NgramsT where
169 -----------------------------------------------------------------------
173 , _ngramsId :: NgramsId
174 } deriving (Show, Generic, Eq, Ord)
176 makeLenses ''NgramsIndexed
177 ------------------------------------------------------------------------
182 } deriving (Show, Generic, Eq, Ord)
184 instance PGS.FromRow NgramIds where
185 fromRow = NgramIds <$> field <*> field
187 ----------------------
188 withMap :: Map NgramsTerms NgramsId -> NgramsTerms -> NgramsId
189 withMap m n = maybe (panic "withMap: should not happen") identity (lookup n m)
191 indexNgramsT :: Map NgramsTerms NgramsId -> NgramsT Ngrams -> NgramsT NgramsIndexed
192 indexNgramsT = fmap . indexNgramsWith . withMap
194 indexNgrams :: Map NgramsTerms NgramsId -> Ngrams -> NgramsIndexed
195 indexNgrams = indexNgramsWith . withMap
197 -- NP: not sure we need it anymore
198 indexNgramsTWith :: (NgramsTerms -> NgramsId) -> NgramsT Ngrams -> NgramsT NgramsIndexed
199 indexNgramsTWith = fmap . indexNgramsWith
201 indexNgramsWith :: (NgramsTerms -> NgramsId) -> Ngrams -> NgramsIndexed
202 indexNgramsWith f n = NgramsIndexed n (f $ _ngramsTerms n)