2 Module : Gargantext.Database.Schema.Ngrams
3 Description : Ngram connection to the Database
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Ngrams connection to the Database.
14 {-# LANGUAGE Arrows #-}
15 {-# LANGUAGE FunctionalDependencies #-}
16 {-# LANGUAGE QuasiQuotes #-}
17 {-# LANGUAGE TemplateHaskell #-}
19 module Gargantext.Database.Schema.Ngrams
22 import Data.Hashable (Hashable)
23 import Codec.Serialise (Serialise())
24 import Control.Lens (over)
25 import Control.Monad (mzero)
27 import Data.Aeson.Types (toJSONKeyText)
28 import Data.Map (Map, fromList, lookup)
29 import Data.Text (Text, splitOn, pack, strip)
30 import Gargantext.Core.Types (TODO(..))
31 import Gargantext.Prelude
32 import Prelude (Functor)
33 import Servant (FromHttpApiData, parseUrlPiece, Proxy(..))
34 import Text.Read (read)
35 import Gargantext.Database.Schema.Prelude
36 import qualified Database.PostgreSQL.Simple as PGS
40 type NgramsTerms = Text
43 data NgramsPoly id terms n = NgramsDB { _ngrams_id :: !id
44 , _ngrams_terms :: !terms
48 type NgramsWrite = NgramsPoly (Maybe (Column PGInt4))
52 type NgramsRead = NgramsPoly (Column PGInt4)
56 type NgramsReadNull = NgramsPoly (Column (Nullable PGInt4))
57 (Column (Nullable PGText))
58 (Column (Nullable PGInt4))
60 type NgramsDB = NgramsPoly Int Text Int
62 $(makeAdaptorAndInstance "pNgramsDb" ''NgramsPoly)
63 makeLenses ''NgramsPoly
66 ngramsTable :: Table NgramsWrite NgramsRead
67 ngramsTable = Table "ngrams" (pNgramsDb NgramsDB { _ngrams_id = optional "id"
68 , _ngrams_terms = required "terms"
69 , _ngrams_n = required "n"
75 -- | Main Ngrams Types
77 -- Typed Ngrams localize the context of the ngrams
78 -- ngrams in source field of document has Sources Type
79 -- ngrams in authors field of document has Authors Type
80 -- ngrams in text (title or abstract) of documents has Terms Type
81 data NgramsType = Authors | Institutes | Sources | NgramsTerms
82 deriving (Eq, Show, Read, Ord, Enum, Bounded, Generic)
84 instance Serialise NgramsType
85 instance Hashable NgramsType
87 ngramsTypes :: [NgramsType]
88 ngramsTypes = [minBound..]
90 instance ToSchema NgramsType
92 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
95 newtype NgramsTypeId = NgramsTypeId Int
96 deriving (Eq, Show, Ord, Num)
98 instance ToField NgramsTypeId where
99 toField (NgramsTypeId n) = toField n
101 instance FromField NgramsTypeId where
102 fromField fld mdata = do
103 n <- fromField fld mdata
104 if (n :: Int) > 0 then return $ NgramsTypeId n
107 instance FromJSON NgramsType
108 instance FromJSONKey NgramsType where
109 fromJSONKey = FromJSONKeyTextParser (parseJSON . String)
110 instance ToJSON NgramsType
111 instance ToJSONKey NgramsType where
112 toJSONKey = toJSONKeyText (pack . show)
114 instance FromHttpApiData NgramsType where
115 parseUrlPiece n = pure $ (read . cs) n
117 instance ToParamSchema NgramsType where
118 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
121 instance QueryRunnerColumnDefault (Nullable PGInt4) NgramsTypeId
123 queryRunnerColumnDefault = fieldQueryRunnerColumn
125 pgNgramsType :: NgramsType -> Column PGInt4
126 pgNgramsType = pgNgramsTypeId . ngramsTypeId
128 pgNgramsTypeId :: NgramsTypeId -> Column PGInt4
129 pgNgramsTypeId (NgramsTypeId n) = pgInt4 n
131 ngramsTypeId :: NgramsType -> NgramsTypeId
132 ngramsTypeId Authors = 1
133 ngramsTypeId Institutes = 2
134 ngramsTypeId Sources = 3
135 ngramsTypeId NgramsTerms = 4
137 fromNgramsTypeId :: NgramsTypeId -> Maybe NgramsType
138 fromNgramsTypeId id = lookup id
139 $ fromList [ (ngramsTypeId nt,nt)
140 | nt <- [minBound .. maxBound] :: [NgramsType]
143 ------------------------------------------------------------------------
144 -- | TODO put it in Gargantext.Core.Text.Ngrams
145 data Ngrams = UnsafeNgrams { _ngramsTerms :: Text
147 } deriving (Generic, Show, Eq, Ord)
150 instance PGS.ToRow Ngrams where
151 toRow (UnsafeNgrams t s) = [toField t, toField s]
153 text2ngrams :: Text -> Ngrams
154 text2ngrams txt = UnsafeNgrams txt' $ length $ splitOn " " txt'
159 -------------------------------------------------------------------------
160 -- | TODO put it in Gargantext.Core.Text.Ngrams
161 -- Named entity are typed ngrams of Terms Ngrams
163 NgramsT { _ngramsType :: NgramsType
165 } deriving (Generic, Show, Eq, Ord)
169 instance Functor NgramsT where
171 -----------------------------------------------------------------------
175 , _ngramsId :: NgramsId
176 } deriving (Show, Generic, Eq, Ord)
178 makeLenses ''NgramsIndexed
179 ------------------------------------------------------------------------
184 } deriving (Show, Generic, Eq, Ord)
186 instance PGS.FromRow NgramIds where
187 fromRow = NgramIds <$> field <*> field
189 ----------------------
190 withMap :: Map NgramsTerms NgramsId -> NgramsTerms -> NgramsId
191 withMap m n = maybe (panic "withMap: should not happen") identity (lookup n m)
193 indexNgramsT :: Map NgramsTerms NgramsId -> NgramsT Ngrams -> NgramsT NgramsIndexed
194 indexNgramsT = fmap . indexNgramsWith . withMap
196 indexNgrams :: Map NgramsTerms NgramsId -> Ngrams -> NgramsIndexed
197 indexNgrams = indexNgramsWith . withMap
199 -- NP: not sure we need it anymore
200 indexNgramsTWith :: (NgramsTerms -> NgramsId) -> NgramsT Ngrams -> NgramsT NgramsIndexed
201 indexNgramsTWith = fmap . indexNgramsWith
203 indexNgramsWith :: (NgramsTerms -> NgramsId) -> Ngrams -> NgramsIndexed
204 indexNgramsWith f n = NgramsIndexed n (f $ _ngramsTerms n)