2 Module : Gargantext.Database.Schema.Ngrams
3 Description : Ngram connection to the Database
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Ngrams connection to the Database.
14 {-# LANGUAGE Arrows #-}
15 {-# LANGUAGE FunctionalDependencies #-}
16 {-# LANGUAGE QuasiQuotes #-}
17 {-# LANGUAGE TemplateHaskell #-}
19 module Gargantext.Database.Schema.Ngrams
22 import Codec.Serialise (Serialise())
23 import Control.Lens (over)
24 import Control.Monad (mzero)
26 import Data.Aeson.Types (toJSONKeyText)
27 import Data.Map (Map, fromList, lookup)
28 import Data.Text (Text, splitOn, pack)
29 import Gargantext.Core.Types (TODO(..))
30 import Gargantext.Prelude
31 import Prelude (Functor)
32 import Servant (FromHttpApiData, parseUrlPiece, Proxy(..))
33 import Text.Read (read)
34 import Gargantext.Database.Schema.Prelude
35 import qualified Database.PostgreSQL.Simple as PGS
39 type NgramsTerms = Text
42 data NgramsPoly id terms n = NgramsDB { _ngrams_id :: !id
43 , _ngrams_terms :: !terms
47 type NgramsWrite = NgramsPoly (Maybe (Column PGInt4))
51 type NgramsRead = NgramsPoly (Column PGInt4)
55 type NgramsReadNull = NgramsPoly (Column (Nullable PGInt4))
56 (Column (Nullable PGText))
57 (Column (Nullable PGInt4))
59 type NgramsDB = NgramsPoly Int Text Int
61 $(makeAdaptorAndInstance "pNgramsDb" ''NgramsPoly)
62 makeLenses ''NgramsPoly
65 ngramsTable :: Table NgramsWrite NgramsRead
66 ngramsTable = Table "ngrams" (pNgramsDb NgramsDB { _ngrams_id = optional "id"
67 , _ngrams_terms = required "terms"
68 , _ngrams_n = required "n"
74 -- | Main Ngrams Types
76 -- Typed Ngrams localize the context of the ngrams
77 -- ngrams in source field of document has Sources Type
78 -- ngrams in authors field of document has Authors Type
79 -- ngrams in text (title or abstract) of documents has Terms Type
80 data NgramsType = Authors | Institutes | Sources | NgramsTerms
81 deriving (Eq, Show, Read, Ord, Enum, Bounded, Generic)
83 instance Serialise NgramsType
85 ngramsTypes :: [NgramsType]
86 ngramsTypes = [minBound..]
88 instance ToSchema NgramsType
90 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
93 newtype NgramsTypeId = NgramsTypeId Int
94 deriving (Eq, Show, Ord, Num)
96 instance ToField NgramsTypeId where
97 toField (NgramsTypeId n) = toField n
99 instance FromField NgramsTypeId where
100 fromField fld mdata = do
101 n <- fromField fld mdata
102 if (n :: Int) > 0 then return $ NgramsTypeId n
105 instance FromJSON NgramsType
106 instance FromJSONKey NgramsType where
107 fromJSONKey = FromJSONKeyTextParser (parseJSON . String)
108 instance ToJSON NgramsType
109 instance ToJSONKey NgramsType where
110 toJSONKey = toJSONKeyText (pack . show)
112 instance FromHttpApiData NgramsType where
113 parseUrlPiece n = pure $ (read . cs) n
115 instance ToParamSchema NgramsType where
116 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
119 instance QueryRunnerColumnDefault (Nullable PGInt4) NgramsTypeId
121 queryRunnerColumnDefault = fieldQueryRunnerColumn
123 pgNgramsType :: NgramsType -> Column PGInt4
124 pgNgramsType = pgNgramsTypeId . ngramsTypeId
126 pgNgramsTypeId :: NgramsTypeId -> Column PGInt4
127 pgNgramsTypeId (NgramsTypeId n) = pgInt4 n
129 ngramsTypeId :: NgramsType -> NgramsTypeId
130 ngramsTypeId Authors = 1
131 ngramsTypeId Institutes = 2
132 ngramsTypeId Sources = 3
133 ngramsTypeId NgramsTerms = 4
135 fromNgramsTypeId :: NgramsTypeId -> Maybe NgramsType
136 fromNgramsTypeId id = lookup id
137 $ fromList [ (ngramsTypeId nt,nt)
138 | nt <- [minBound .. maxBound] :: [NgramsType]
141 ------------------------------------------------------------------------
142 -- | TODO put it in Gargantext.Core.Text.Ngrams
143 data Ngrams = Ngrams { _ngramsTerms :: Text
145 } deriving (Generic, Show, Eq, Ord)
148 instance PGS.ToRow Ngrams where
149 toRow (Ngrams t s) = [toField t, toField s]
151 text2ngrams :: Text -> Ngrams
152 text2ngrams txt = Ngrams txt $ length $ splitOn " " txt
154 -------------------------------------------------------------------------
155 -- | TODO put it in Gargantext.Core.Text.Ngrams
156 -- Named entity are typed ngrams of Terms Ngrams
158 NgramsT { _ngramsType :: NgramsType
160 } deriving (Generic, Show, Eq, Ord)
164 instance Functor NgramsT where
166 -----------------------------------------------------------------------
170 , _ngramsId :: NgramsId
171 } deriving (Show, Generic, Eq, Ord)
173 makeLenses ''NgramsIndexed
174 ------------------------------------------------------------------------
179 } deriving (Show, Generic, Eq, Ord)
181 instance PGS.FromRow NgramIds where
182 fromRow = NgramIds <$> field <*> field
184 ----------------------
185 withMap :: Map NgramsTerms NgramsId -> NgramsTerms -> NgramsId
186 withMap m n = maybe (panic "withMap: should not happen") identity (lookup n m)
188 indexNgramsT :: Map NgramsTerms NgramsId -> NgramsT Ngrams -> NgramsT NgramsIndexed
189 indexNgramsT = fmap . indexNgramsWith . withMap
191 indexNgrams :: Map NgramsTerms NgramsId -> Ngrams -> NgramsIndexed
192 indexNgrams = indexNgramsWith . withMap
194 -- NP: not sure we need it anymore
195 indexNgramsTWith :: (NgramsTerms -> NgramsId) -> NgramsT Ngrams -> NgramsT NgramsIndexed
196 indexNgramsTWith = fmap . indexNgramsWith
198 indexNgramsWith :: (NgramsTerms -> NgramsId) -> Ngrams -> NgramsIndexed
199 indexNgramsWith f n = NgramsIndexed n (f $ _ngramsTerms n)