2 Module : Gargantext.Database.Schema.NgramsPostag
3 Description : Ngram connection to the Database
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Ngrams connection to the Database.
14 {-# LANGUAGE Arrows #-}
15 {-# LANGUAGE FunctionalDependencies #-}
16 {-# LANGUAGE QuasiQuotes #-}
17 {-# LANGUAGE TemplateHaskell #-}
19 module Gargantext.Database.Schema.Ngrams
22 import Data.HashMap.Strict (HashMap)
23 import Data.Hashable (Hashable)
24 import Codec.Serialise (Serialise())
25 import Control.Lens (over)
26 import Control.Monad (mzero)
28 import Data.Aeson.Types (toJSONKeyText)
29 import Data.Map (Map, fromList, lookup)
30 import Data.Text (Text, splitOn, pack, strip)
31 import Gargantext.Core.Types (TODO(..), Typed(..))
32 import Gargantext.Prelude
33 import Prelude (Functor)
34 import Servant (FromHttpApiData, parseUrlPiece, Proxy(..))
35 import Text.Read (read)
36 import Gargantext.Database.Types
37 import Gargantext.Database.Schema.Prelude
38 import qualified Database.PostgreSQL.Simple as PGS
39 import qualified Data.HashMap.Strict as HashMap
45 data NgramsPoly id terms n = NgramsDB { _ngrams_id :: !id
46 , _ngrams_terms :: !terms
50 type NgramsWrite = NgramsPoly (Maybe (Column PGInt4))
54 type NgramsRead = NgramsPoly (Column PGInt4)
58 type NgramsReadNull = NgramsPoly (Column (Nullable PGInt4))
59 (Column (Nullable PGText))
60 (Column (Nullable PGInt4))
62 type NgramsDB = NgramsPoly Int Text Int
64 $(makeAdaptorAndInstance "pNgramsDb" ''NgramsPoly)
65 makeLenses ''NgramsPoly
68 ngramsTable :: Table NgramsWrite NgramsRead
69 ngramsTable = Table "ngrams" (pNgramsDb NgramsDB { _ngrams_id = optional "id"
70 , _ngrams_terms = required "terms"
71 , _ngrams_n = required "n"
75 -- | Main Ngrams Types
77 -- Typed Ngrams localize the context of the ngrams
78 -- ngrams in source field of document has Sources Type
79 -- ngrams in authors field of document has Authors Type
80 -- ngrams in text fields of documents has Terms Type (i.e. either title or abstract)
81 data NgramsType = Authors | Institutes | Sources | NgramsTerms
82 deriving (Eq, Show, Read, Ord, Enum, Bounded, Generic)
84 instance Serialise NgramsType
86 ngramsTypes :: [NgramsType]
87 ngramsTypes = [minBound..]
89 instance ToSchema NgramsType
91 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
94 newtype NgramsTypeId = NgramsTypeId Int
95 deriving (Eq, Show, Ord, Num)
97 instance ToField NgramsTypeId where
98 toField (NgramsTypeId n) = toField n
100 instance FromField NgramsTypeId where
101 fromField fld mdata = do
102 n <- fromField fld mdata
103 if (n :: Int) > 0 then return $ NgramsTypeId n
106 instance FromJSON NgramsType
107 instance FromJSONKey NgramsType where
108 fromJSONKey = FromJSONKeyTextParser (parseJSON . String)
110 instance ToJSON NgramsType
111 instance ToJSONKey NgramsType where
112 toJSONKey = toJSONKeyText (pack . show)
114 instance FromHttpApiData NgramsType where
115 parseUrlPiece n = pure $ (read . cs) n
117 instance ToParamSchema NgramsType where
118 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
121 instance QueryRunnerColumnDefault (Nullable PGInt4) NgramsTypeId
123 queryRunnerColumnDefault = fieldQueryRunnerColumn
125 pgNgramsType :: NgramsType -> Column PGInt4
126 pgNgramsType = pgNgramsTypeId . ngramsTypeId
128 pgNgramsTypeId :: NgramsTypeId -> Column PGInt4
129 pgNgramsTypeId (NgramsTypeId n) = pgInt4 n
131 ngramsTypeId :: NgramsType -> NgramsTypeId
132 ngramsTypeId Authors = 1
133 ngramsTypeId Institutes = 2
134 ngramsTypeId Sources = 3
135 ngramsTypeId NgramsTerms = 4
137 fromNgramsTypeId :: NgramsTypeId -> Maybe NgramsType
138 fromNgramsTypeId id = lookup id
139 $ fromList [ (ngramsTypeId nt,nt)
140 | nt <- [minBound .. maxBound] :: [NgramsType]
143 ------------------------------------------------------------------------
144 ------------------------------------------------------------------------
145 -- | TODO put it in Gargantext.Core.Text.Ngrams
146 data Ngrams = UnsafeNgrams { _ngramsTerms :: Text
149 deriving (Generic, Show, Eq, Ord)
151 instance Hashable Ngrams
154 instance PGS.ToRow Ngrams where
155 toRow (UnsafeNgrams t s) = [toField t, toField s]
157 instance FromField Ngrams where
158 fromField fld mdata = do
159 x <- fromField fld mdata
162 text2ngrams :: Text -> Ngrams
163 text2ngrams txt = UnsafeNgrams txt' $ length $ splitOn " " txt'
168 ------------------------------------------------------------------------
169 -------------------------------------------------------------------------
170 -- | TODO put it in Gargantext.Core.Text.Ngrams
171 -- Named entity are typed ngrams of Terms Ngrams
173 NgramsT { _ngramsType :: NgramsType
175 } deriving (Generic, Show, Eq, Ord)
179 instance Functor NgramsT where
182 -----------------------------------------------------------------------
183 withMap :: HashMap Text NgramsId -> Text -> NgramsId
184 withMap m n = maybe (panic "withMap: should not happen") identity (HashMap.lookup n m)
186 indexNgramsT :: HashMap Text NgramsId -> NgramsT Ngrams -> NgramsT (Indexed Int Ngrams)
187 indexNgramsT = fmap . indexNgramsWith . withMap
189 -- | TODO replace NgramsT whith Typed NgramsType Ngrams
190 indexTypedNgrams :: HashMap Text NgramsId
191 -> Typed NgramsType Ngrams
192 -> Typed NgramsType (Indexed Int Ngrams)
193 indexTypedNgrams = fmap . indexNgramsWith . withMap
195 indexNgrams :: HashMap Text NgramsId -> Ngrams -> Indexed Int Ngrams
196 indexNgrams = indexNgramsWith . withMap
198 indexNgramsWith :: (Text -> NgramsId) -> Ngrams -> Indexed Int Ngrams
199 indexNgramsWith f n = Indexed (f $ _ngramsTerms n) n