2 Module : Gargantext.Database.Schema.NgramsPostag
3 Description : Ngram connection to the Database
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Ngrams connection to the Database.
14 {-# OPTIONS_GHC -fno-warn-orphans #-}
15 {-# LANGUAGE Arrows #-}
16 {-# LANGUAGE FunctionalDependencies #-}
17 {-# LANGUAGE QuasiQuotes #-}
18 {-# LANGUAGE TemplateHaskell #-}
20 module Gargantext.Database.Schema.Ngrams
23 import Data.Maybe (fromMaybe)
24 import Data.HashMap.Strict (HashMap)
25 import Data.Hashable (Hashable)
26 import Codec.Serialise (Serialise())
27 import Control.Lens (over)
28 import Control.Monad (mzero)
30 import Data.Aeson.Types (toJSONKeyText)
31 import Data.Map (fromList, lookup)
32 import Data.Text (Text, splitOn, pack, strip)
33 import Gargantext.Core.Types (TODO(..), Typed(..))
34 import Gargantext.Prelude
35 import Servant (FromHttpApiData(..), Proxy(..), ToHttpApiData(..))
36 import Text.Read (read)
37 import Gargantext.Core (HasDBid(..))
38 import Gargantext.Database.Types
39 import Gargantext.Database.Schema.Prelude
40 import qualified Database.PostgreSQL.Simple as PGS
41 import qualified Data.HashMap.Strict as HashMap
47 data NgramsPoly id terms n = NgramsDB { _ngrams_id :: !id
48 , _ngrams_terms :: !terms
52 type NgramsWrite = NgramsPoly (Maybe (Column SqlInt4))
56 type NgramsRead = NgramsPoly (Column SqlInt4)
60 type NgramsReadNull = NgramsPoly (Column (Nullable SqlInt4))
61 (Column (Nullable SqlText))
62 (Column (Nullable SqlInt4))
64 type NgramsDB = NgramsPoly Int Text Int
66 $(makeAdaptorAndInstance "pNgramsDb" ''NgramsPoly)
67 makeLenses ''NgramsPoly
70 ngramsTable :: Table NgramsWrite NgramsRead
71 ngramsTable = Table "ngrams" (pNgramsDb NgramsDB { _ngrams_id = optionalTableField "id"
72 , _ngrams_terms = requiredTableField "terms"
73 , _ngrams_n = requiredTableField "n"
77 -- | Main Ngrams Types
79 -- Typed Ngrams localize the context of the ngrams
80 -- ngrams in source field of document has Sources Type
81 -- ngrams in authors field of document has Authors Type
82 -- ngrams in text fields of documents has Terms Type (i.e. either title or abstract)
83 data NgramsType = Authors | Institutes | Sources | NgramsTerms
84 deriving (Eq, Show, Read, Ord, Enum, Bounded, Generic)
86 instance Serialise NgramsType
89 ngramsTypes :: [NgramsType]
90 ngramsTypes = [minBound..]
92 instance ToSchema NgramsType
94 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
97 newtype NgramsTypeId = NgramsTypeId Int
98 deriving (Eq, Show, Ord, Num)
100 instance ToField NgramsTypeId where
101 toField (NgramsTypeId n) = toField n
103 instance FromField NgramsTypeId where
104 fromField fld mdata = do
105 n <- fromField fld mdata
106 if (n :: Int) > 0 then return $ NgramsTypeId n
109 instance FromJSON NgramsType
110 instance FromJSONKey NgramsType where
111 fromJSONKey = FromJSONKeyTextParser (parseJSON . String)
113 instance ToJSON NgramsType
114 instance ToJSONKey NgramsType where
115 toJSONKey = toJSONKeyText (pack . show)
117 instance FromHttpApiData NgramsType where
118 parseUrlPiece n = pure $ (read . cs) n
119 instance ToHttpApiData NgramsType where
120 toUrlPiece = pack . show
122 instance ToParamSchema NgramsType where
123 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
126 instance DefaultFromField (Nullable SqlInt4) NgramsTypeId
128 defaultFromField = fromPGSFromField
130 pgNgramsType :: NgramsType -> Column SqlInt4
131 pgNgramsType = pgNgramsTypeId . ngramsTypeId
133 pgNgramsTypeId :: NgramsTypeId -> Column SqlInt4
134 pgNgramsTypeId (NgramsTypeId n) = sqlInt4 n
136 ngramsTypeId :: NgramsType -> NgramsTypeId
137 ngramsTypeId Authors = 1
138 ngramsTypeId Institutes = 2
139 ngramsTypeId Sources = 3
140 ngramsTypeId NgramsTerms = 4
142 fromNgramsTypeId :: NgramsTypeId -> Maybe NgramsType
143 fromNgramsTypeId id = lookup id
144 $ fromList [ (ngramsTypeId nt,nt)
145 | nt <- [minBound .. maxBound] :: [NgramsType]
148 unNgramsTypeId :: NgramsTypeId -> Int
149 unNgramsTypeId (NgramsTypeId i) = i
151 toNgramsTypeId :: Int -> NgramsTypeId
152 toNgramsTypeId i = NgramsTypeId i
154 instance HasDBid NgramsType where
155 toDBid = unNgramsTypeId . ngramsTypeId
156 fromDBid = fromMaybe (panic "NgramsType id not indexed") . fromNgramsTypeId . toNgramsTypeId
158 ------------------------------------------------------------------------
159 ------------------------------------------------------------------------
160 -- | TODO put it in Gargantext.Core.Text.Ngrams
161 data Ngrams = UnsafeNgrams { _ngramsTerms :: Text
164 deriving (Generic, Show, Eq, Ord)
166 instance Hashable Ngrams
169 instance PGS.ToRow Ngrams where
170 toRow (UnsafeNgrams t s) = [toField t, toField s]
172 instance FromField Ngrams where
173 fromField fld mdata = do
174 x <- fromField fld mdata
177 instance PGS.ToRow Text where
178 toRow t = [toField t]
180 text2ngrams :: Text -> Ngrams
181 text2ngrams txt = UnsafeNgrams txt' $ length $ splitOn " " txt'
186 ------------------------------------------------------------------------
187 -------------------------------------------------------------------------
188 -- | TODO put it in Gargantext.Core.Text.Ngrams
189 -- Named entity are typed ngrams of Terms Ngrams
191 NgramsT { _ngramsType :: NgramsType
193 } deriving (Generic, Show, Eq, Ord)
197 instance Functor NgramsT where
200 -----------------------------------------------------------------------
201 withMap :: HashMap Text NgramsId -> Text -> NgramsId
202 withMap m n = maybe (panic $ "[G.D.S.Ngrams.withMap] Should not happen" <> (cs $ show n))
203 identity (HashMap.lookup n m)
205 indexNgramsT :: HashMap Text NgramsId -> NgramsT Ngrams -> NgramsT (Indexed Int Ngrams)
206 indexNgramsT = fmap . indexNgramsWith . withMap
208 -- | TODO replace NgramsT whith Typed NgramsType Ngrams
209 indexTypedNgrams :: HashMap Text NgramsId
210 -> Typed NgramsType Ngrams
211 -> Typed NgramsType (Indexed Int Ngrams)
212 indexTypedNgrams = fmap . indexNgramsWith . withMap
214 indexNgrams :: HashMap Text NgramsId -> Ngrams -> Indexed Int Ngrams
215 indexNgrams = indexNgramsWith . withMap
217 indexNgramsWith :: (Text -> NgramsId) -> Ngrams -> Indexed Int Ngrams
218 indexNgramsWith f n = Indexed (f $ _ngramsTerms n) n