2 Module : Gargantext.Database.Schema.Ngrams
3 Description : Ngram connection to the Database
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Ngrams connection to the Database.
14 {-# LANGUAGE Arrows #-}
15 {-# LANGUAGE DeriveGeneric #-}
16 {-# LANGUAGE FlexibleContexts #-}
17 {-# LANGUAGE FlexibleInstances #-}
18 {-# LANGUAGE FunctionalDependencies #-}
19 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
20 {-# LANGUAGE MultiParamTypeClasses #-}
21 {-# LANGUAGE NoImplicitPrelude #-}
22 {-# LANGUAGE OverloadedStrings #-}
23 {-# LANGUAGE QuasiQuotes #-}
24 {-# LANGUAGE RankNTypes #-}
25 {-# LANGUAGE TemplateHaskell #-}
27 module Gargantext.Database.Schema.Ngrams
30 import Control.Lens (makeLenses, over)
31 import Control.Monad (mzero)
33 import Data.Aeson.Types (toJSONKeyText)
34 import Data.Map (Map, fromList, lookup)
35 import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
36 import Data.Swagger (ToParamSchema, toParamSchema, ToSchema)
37 import Data.Text (Text, splitOn, pack)
38 import GHC.Generics (Generic)
39 import Gargantext.Core.Types (TODO(..))
40 import Gargantext.Prelude
41 import Prelude (Enum, Bounded, minBound, maxBound, Functor)
42 import Servant (FromHttpApiData, parseUrlPiece, Proxy(..))
43 import Text.Read (read)
44 import Gargantext.Database.Schema.Prelude
45 import qualified Database.PostgreSQL.Simple as PGS
49 type NgramsTerms = Text
52 data NgramsPoly id terms n = NgramsDb { _ngrams_id :: !id
53 , _ngrams_terms :: !terms
57 type NgramsWrite = NgramsPoly (Maybe (Column PGInt4))
61 type NgramsRead = NgramsPoly (Column PGInt4)
65 type NgramsReadNull = NgramsPoly (Column (Nullable PGInt4))
66 (Column (Nullable PGText))
67 (Column (Nullable PGInt4))
69 type NgramsDb = NgramsPoly Int Text Int
71 $(makeAdaptorAndInstance "pNgramsDb" ''NgramsPoly)
72 makeLenses ''NgramsPoly
75 ngramsTable :: Table NgramsWrite NgramsRead
76 ngramsTable = Table "ngrams" (pNgramsDb NgramsDb { _ngrams_id = optional "id"
77 , _ngrams_terms = required "terms"
78 , _ngrams_n = required "n"
84 -- | Main Ngrams Types
86 -- Typed Ngrams localize the context of the ngrams
87 -- ngrams in source field of document has Sources Type
88 -- ngrams in authors field of document has Authors Type
89 -- ngrams in text (title or abstract) of documents has Terms Type
90 data NgramsType = Authors | Institutes | Sources | NgramsTerms
91 deriving (Eq, Show, Read, Ord, Enum, Bounded, Generic)
93 ngramsTypes :: [NgramsType]
94 ngramsTypes = [minBound..]
96 instance ToSchema NgramsType
98 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
101 newtype NgramsTypeId = NgramsTypeId Int
102 deriving (Eq, Show, Ord, Num)
104 instance ToField NgramsTypeId where
105 toField (NgramsTypeId n) = toField n
107 instance FromField NgramsTypeId where
108 fromField fld mdata = do
109 n <- fromField fld mdata
110 if (n :: Int) > 0 then return $ NgramsTypeId n
113 instance FromJSON NgramsType
114 instance FromJSONKey NgramsType where
115 fromJSONKey = FromJSONKeyTextParser (parseJSON . String)
116 instance ToJSON NgramsType
117 instance ToJSONKey NgramsType where
118 toJSONKey = toJSONKeyText (pack . show)
120 instance FromHttpApiData NgramsType where
121 parseUrlPiece n = pure $ (read . cs) n
123 instance ToParamSchema NgramsType where
124 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
127 instance QueryRunnerColumnDefault (Nullable PGInt4) NgramsTypeId
129 queryRunnerColumnDefault = fieldQueryRunnerColumn
131 pgNgramsType :: NgramsType -> Column PGInt4
132 pgNgramsType = pgNgramsTypeId . ngramsTypeId
134 pgNgramsTypeId :: NgramsTypeId -> Column PGInt4
135 pgNgramsTypeId (NgramsTypeId n) = pgInt4 n
137 ngramsTypeId :: NgramsType -> NgramsTypeId
138 ngramsTypeId Authors = 1
139 ngramsTypeId Institutes = 2
140 ngramsTypeId Sources = 3
141 ngramsTypeId NgramsTerms = 4
143 fromNgramsTypeId :: NgramsTypeId -> Maybe NgramsType
144 fromNgramsTypeId id = lookup id
145 $ fromList [ (ngramsTypeId nt,nt)
146 | nt <- [minBound .. maxBound] :: [NgramsType]
149 ------------------------------------------------------------------------
150 -- | TODO put it in Gargantext.Text.Ngrams
151 data Ngrams = Ngrams { _ngramsTerms :: Text
153 } deriving (Generic, Show, Eq, Ord)
156 instance PGS.ToRow Ngrams where
157 toRow (Ngrams t s) = [toField t, toField s]
159 text2ngrams :: Text -> Ngrams
160 text2ngrams txt = Ngrams txt $ length $ splitOn " " txt
162 -------------------------------------------------------------------------
163 -- | TODO put it in Gargantext.Text.Ngrams
164 -- Named entity are typed ngrams of Terms Ngrams
166 NgramsT { _ngramsType :: NgramsType
168 } deriving (Generic, Show, Eq, Ord)
172 instance Functor NgramsT where
174 -----------------------------------------------------------------------
178 , _ngramsId :: NgramsId
179 } deriving (Show, Generic, Eq, Ord)
181 makeLenses ''NgramsIndexed
182 ------------------------------------------------------------------------
187 } deriving (Show, Generic, Eq, Ord)
189 instance PGS.FromRow NgramIds where
190 fromRow = NgramIds <$> field <*> field
192 ----------------------
193 withMap :: Map NgramsTerms NgramsId -> NgramsTerms -> NgramsId
194 withMap m n = maybe (panic "withMap: should not happen") identity (lookup n m)
196 indexNgramsT :: Map NgramsTerms NgramsId -> NgramsT Ngrams -> NgramsT NgramsIndexed
197 indexNgramsT = fmap . indexNgramsWith . withMap
199 indexNgrams :: Map NgramsTerms NgramsId -> Ngrams -> NgramsIndexed
200 indexNgrams = indexNgramsWith . withMap
202 -- NP: not sure we need it anymore
203 indexNgramsTWith :: (NgramsTerms -> NgramsId) -> NgramsT Ngrams -> NgramsT NgramsIndexed
204 indexNgramsTWith = fmap . indexNgramsWith
206 indexNgramsWith :: (NgramsTerms -> NgramsId) -> Ngrams -> NgramsIndexed
207 indexNgramsWith f n = NgramsIndexed n (f $ _ngramsTerms n)