2 Module : Gargantext.Database.Schema.Ngrams
3 Description : Ngram connection to the Database
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Ngrams connection to the Database.
14 {-# LANGUAGE Arrows #-}
15 {-# LANGUAGE DeriveGeneric #-}
16 {-# LANGUAGE FlexibleContexts #-}
17 {-# LANGUAGE FlexibleInstances #-}
18 {-# LANGUAGE FunctionalDependencies #-}
19 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
20 {-# LANGUAGE MultiParamTypeClasses #-}
21 {-# LANGUAGE NoImplicitPrelude #-}
22 {-# LANGUAGE OverloadedStrings #-}
23 {-# LANGUAGE QuasiQuotes #-}
24 {-# LANGUAGE RankNTypes #-}
25 {-# LANGUAGE TemplateHaskell #-}
27 module Gargantext.Database.Schema.Ngrams
30 import Control.Lens (makeLenses, over)
31 import Control.Monad (mzero)
33 import Data.Aeson.Types (toJSONKeyText)
34 import Data.ByteString.Internal (ByteString)
35 import Data.Map (Map, fromList, lookup)
36 import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
37 import Data.Swagger (ToParamSchema, toParamSchema, ToSchema)
38 import Data.Text (Text, splitOn, pack)
39 import GHC.Generics (Generic)
40 import Gargantext.Core.Types (TODO(..))
41 import Gargantext.Prelude
42 import Prelude (Enum, Bounded, minBound, maxBound, Functor)
43 import Servant (FromHttpApiData, parseUrlPiece, Proxy(..))
44 import Text.Read (read)
45 import Gargantext.Database.Schema.Prelude
46 import qualified Database.PostgreSQL.Simple as PGS
50 type NgramsTerms = Text
53 data NgramsPoly id terms n = NgramsDb { _ngrams_id :: !id
54 , _ngrams_terms :: !terms
58 type NgramsWrite = NgramsPoly (Maybe (Column PGInt4))
62 type NgramsRead = NgramsPoly (Column PGInt4)
66 type NgramsReadNull = NgramsPoly (Column (Nullable PGInt4))
67 (Column (Nullable PGText))
68 (Column (Nullable PGInt4))
70 type NgramsDb = NgramsPoly Int Text Int
72 $(makeAdaptorAndInstance "pNgramsDb" ''NgramsPoly)
73 makeLenses ''NgramsPoly
76 ngramsTable :: Table NgramsWrite NgramsRead
77 ngramsTable = Table "ngrams" (pNgramsDb NgramsDb { _ngrams_id = optional "id"
78 , _ngrams_terms = required "terms"
79 , _ngrams_n = required "n"
85 -- | Main Ngrams Types
87 -- Typed Ngrams localize the context of the ngrams
88 -- ngrams in source field of document has Sources Type
89 -- ngrams in authors field of document has Authors Type
90 -- ngrams in text (title or abstract) of documents has Terms Type
91 data NgramsType = Authors | Institutes | Sources | NgramsTerms
92 deriving (Eq, Show, Read, Ord, Enum, Bounded, Generic)
94 ngramsTypes :: [NgramsType]
95 ngramsTypes = [minBound..]
97 instance ToSchema NgramsType
99 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
102 newtype NgramsTypeId = NgramsTypeId Int
103 deriving (Eq, Show, Ord, Num)
105 instance ToField NgramsTypeId where
106 toField (NgramsTypeId n) = toField n
108 instance FromField NgramsTypeId where
109 fromField fld mdata = do
110 n <- fromField fld mdata
111 if (n :: Int) > 0 then return $ NgramsTypeId n
114 instance FromJSON NgramsType
115 instance FromJSONKey NgramsType where
116 fromJSONKey = FromJSONKeyTextParser (parseJSON . String)
117 instance ToJSON NgramsType
118 instance ToJSONKey NgramsType where
119 toJSONKey = toJSONKeyText (pack . show)
121 instance FromHttpApiData NgramsType where
122 parseUrlPiece n = pure $ (read . cs) n
124 instance ToParamSchema NgramsType where
125 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
128 instance QueryRunnerColumnDefault (Nullable PGInt4) NgramsTypeId
130 queryRunnerColumnDefault = fieldQueryRunnerColumn
132 pgNgramsType :: NgramsType -> Column PGInt4
133 pgNgramsType = pgNgramsTypeId . ngramsTypeId
135 pgNgramsTypeId :: NgramsTypeId -> Column PGInt4
136 pgNgramsTypeId (NgramsTypeId n) = pgInt4 n
138 ngramsTypeId :: NgramsType -> NgramsTypeId
139 ngramsTypeId Authors = 1
140 ngramsTypeId Institutes = 2
141 ngramsTypeId Sources = 3
142 ngramsTypeId NgramsTerms = 4
144 fromNgramsTypeId :: NgramsTypeId -> Maybe NgramsType
145 fromNgramsTypeId id = lookup id
146 $ fromList [ (ngramsTypeId nt,nt)
147 | nt <- [minBound .. maxBound] :: [NgramsType]
150 ------------------------------------------------------------------------
151 -- | TODO put it in Gargantext.Text.Ngrams
152 data Ngrams = Ngrams { _ngramsTerms :: Text
154 } deriving (Generic, Show, Eq, Ord)
157 instance PGS.ToRow Ngrams where
158 toRow (Ngrams t s) = [toField t, toField s]
160 text2ngrams :: Text -> Ngrams
161 text2ngrams txt = Ngrams txt $ length $ splitOn " " txt
163 -------------------------------------------------------------------------
164 -- | TODO put it in Gargantext.Text.Ngrams
165 -- Named entity are typed ngrams of Terms Ngrams
167 NgramsT { _ngramsType :: NgramsType
169 } deriving (Generic, Show, Eq, Ord)
173 instance Functor NgramsT where
175 -----------------------------------------------------------------------
179 , _ngramsId :: NgramsId
180 } deriving (Show, Generic, Eq, Ord)
182 makeLenses ''NgramsIndexed
183 ------------------------------------------------------------------------
188 } deriving (Show, Generic, Eq, Ord)
190 instance PGS.FromRow NgramIds where
191 fromRow = NgramIds <$> field <*> field
193 ----------------------
194 withMap :: Map NgramsTerms NgramsId -> NgramsTerms -> NgramsId
195 withMap m n = maybe (panic "withMap: should not happen") identity (lookup n m)
197 indexNgramsT :: Map NgramsTerms NgramsId -> NgramsT Ngrams -> NgramsT NgramsIndexed
198 indexNgramsT = fmap . indexNgramsWith . withMap
200 indexNgrams :: Map NgramsTerms NgramsId -> Ngrams -> NgramsIndexed
201 indexNgrams = indexNgramsWith . withMap
203 -- NP: not sure we need it anymore
204 indexNgramsTWith :: (NgramsTerms -> NgramsId) -> NgramsT Ngrams -> NgramsT NgramsIndexed
205 indexNgramsTWith = fmap . indexNgramsWith
207 indexNgramsWith :: (NgramsTerms -> NgramsId) -> Ngrams -> NgramsIndexed
208 indexNgramsWith f n = NgramsIndexed n (f $ _ngramsTerms n)