2 Module : Gargantext.Database.Schema.NgramsPostag
3 Description : Ngram connection to the Database
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Ngrams connection to the Database.
14 {-# OPTIONS_GHC -fno-warn-orphans #-}
15 {-# LANGUAGE Arrows #-}
16 {-# LANGUAGE FunctionalDependencies #-}
17 {-# LANGUAGE QuasiQuotes #-}
18 {-# LANGUAGE TemplateHaskell #-}
20 module Gargantext.Database.Schema.Ngrams
23 import Codec.Serialise (Serialise())
24 import Control.Lens (over)
25 import Control.Monad (mzero)
27 import Data.Aeson.Types (toJSONKeyText)
28 import Data.HashMap.Strict (HashMap)
29 import Data.Hashable (Hashable)
30 import Data.Map (fromList, lookup)
31 import Data.Maybe (fromMaybe)
32 import Data.Text (Text, splitOn, pack, strip)
33 import Database.PostgreSQL.Simple.FromField (returnError, ResultError(..))
34 import Gargantext.Core (HasDBid(..))
35 import Gargantext.Core.Types (TODO(..), Typed(..))
36 import Gargantext.Database.Schema.Prelude
37 import Gargantext.Database.Types
38 import Gargantext.Prelude
39 import Servant (FromHttpApiData(..), Proxy(..), ToHttpApiData(..))
40 import Test.QuickCheck (elements)
41 import Text.Read (read)
42 import qualified Data.ByteString.Char8 as B
43 import qualified Data.HashMap.Strict as HashMap
44 import qualified Database.PostgreSQL.Simple as PGS
50 data NgramsPoly id terms n = NgramsDB { _ngrams_id :: !id
51 , _ngrams_terms :: !terms
55 type NgramsWrite = NgramsPoly (Maybe (Column SqlInt4))
59 type NgramsRead = NgramsPoly (Column SqlInt4)
63 type NgramsReadNull = NgramsPoly (Column (Nullable SqlInt4))
64 (Column (Nullable SqlText))
65 (Column (Nullable SqlInt4))
67 type NgramsDB = NgramsPoly Int Text Int
69 $(makeAdaptorAndInstance "pNgramsDb" ''NgramsPoly)
70 makeLenses ''NgramsPoly
73 ngramsTable :: Table NgramsWrite NgramsRead
74 ngramsTable = Table "ngrams" (pNgramsDb NgramsDB { _ngrams_id = optionalTableField "id"
75 , _ngrams_terms = requiredTableField "terms"
76 , _ngrams_n = requiredTableField "n"
80 -- | Main Ngrams Types
82 -- Typed Ngrams localize the context of the ngrams
83 -- ngrams in source field of document has Sources Type
84 -- ngrams in authors field of document has Authors Type
85 -- ngrams in text fields of documents has Terms Type (i.e. either title or abstract)
86 data NgramsType = Authors | Institutes | Sources | NgramsTerms
87 deriving (Eq, Show, Read, Ord, Enum, Bounded, Generic)
89 instance Serialise NgramsType
90 instance FromJSON NgramsType
92 parseJSON (String "Authors") = pure Authors
93 parseJSON (String "Institutes") = pure Institutes
94 parseJSON (String "Sources") = pure Sources
95 parseJSON (String "Terms") = pure NgramsTerms
98 instance FromJSONKey NgramsType where
99 fromJSONKey = FromJSONKeyTextParser (parseJSON . String)
101 instance ToJSON NgramsType
103 toJSON Authors = String "Authors"
104 toJSON Institutes = String "Institutes"
105 toJSON Sources = String "Sources"
106 toJSON NgramsTerms = String "Terms"
108 instance ToJSONKey NgramsType where
109 toJSONKey = toJSONKeyText (pack . show)
110 instance FromHttpApiData NgramsType where
111 parseUrlPiece n = pure $ (read . cs) n
112 instance ToHttpApiData NgramsType where
113 toUrlPiece = pack . show
114 instance ToParamSchema NgramsType where
115 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
116 instance Arbitrary NgramsType where
117 arbitrary = elements [ minBound .. maxBound ]
119 -- map NgramsType to its assigned id
120 instance FromField NgramsType where
121 fromField fld mdata =
122 case B.unpack `fmap` mdata of
123 Nothing -> returnError UnexpectedNull fld ""
125 n <- fromField fld mdata
126 if (n :: Int) > 0 then
127 case fromNgramsTypeId (NgramsTypeId n) of
128 Nothing -> returnError ConversionFailed fld dat
131 returnError ConversionFailed fld dat
132 instance ToField NgramsType where
133 toField nt = toField $ ngramsTypeId nt
136 ngramsTypes :: [NgramsType]
137 ngramsTypes = [minBound..]
139 instance ToSchema NgramsType
141 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
144 newtype NgramsTypeId = NgramsTypeId Int
145 deriving (Eq, Show, Ord, Num)
146 instance ToField NgramsTypeId where
147 toField (NgramsTypeId n) = toField n
148 instance FromField NgramsTypeId where
149 fromField fld mdata = do
150 n <- fromField fld mdata
151 if (n :: Int) > 0 then return $ NgramsTypeId n
153 instance DefaultFromField (Nullable SqlInt4) NgramsTypeId
155 defaultFromField = fromPGSFromField
157 pgNgramsType :: NgramsType -> Column SqlInt4
158 pgNgramsType = pgNgramsTypeId . ngramsTypeId
160 pgNgramsTypeId :: NgramsTypeId -> Column SqlInt4
161 pgNgramsTypeId (NgramsTypeId n) = sqlInt4 n
163 ngramsTypeId :: NgramsType -> NgramsTypeId
164 ngramsTypeId Authors = 1
165 ngramsTypeId Institutes = 2
166 ngramsTypeId Sources = 3
167 ngramsTypeId NgramsTerms = 4
169 fromNgramsTypeId :: NgramsTypeId -> Maybe NgramsType
170 fromNgramsTypeId id = lookup id
171 $ fromList [ (ngramsTypeId nt,nt)
172 | nt <- [minBound .. maxBound] :: [NgramsType]
175 unNgramsTypeId :: NgramsTypeId -> Int
176 unNgramsTypeId (NgramsTypeId i) = i
178 toNgramsTypeId :: Int -> NgramsTypeId
179 toNgramsTypeId i = NgramsTypeId i
181 instance HasDBid NgramsType where
182 toDBid = unNgramsTypeId . ngramsTypeId
183 fromDBid = fromMaybe (panic "NgramsType id not indexed") . fromNgramsTypeId . toNgramsTypeId
185 ------------------------------------------------------------------------
186 ------------------------------------------------------------------------
187 -- | TODO put it in Gargantext.Core.Text.Ngrams
188 data Ngrams = UnsafeNgrams { _ngramsTerms :: Text
191 deriving (Generic, Show, Eq, Ord)
193 instance Hashable Ngrams
196 instance PGS.ToRow Ngrams where
197 toRow (UnsafeNgrams t s) = [toField t, toField s]
199 instance FromField Ngrams where
200 fromField fld mdata = do
201 x <- fromField fld mdata
204 instance PGS.ToRow Text where
205 toRow t = [toField t]
207 text2ngrams :: Text -> Ngrams
208 text2ngrams txt = UnsafeNgrams txt' $ length $ splitOn " " txt'
213 ------------------------------------------------------------------------
214 -------------------------------------------------------------------------
215 -- | TODO put it in Gargantext.Core.Text.Ngrams
216 -- Named entity are typed ngrams of Terms Ngrams
218 NgramsT { _ngramsType :: NgramsType
220 } deriving (Generic, Show, Eq, Ord)
224 instance Functor NgramsT where
227 -----------------------------------------------------------------------
228 withMap :: HashMap Text NgramsId -> Text -> NgramsId
229 withMap m n = maybe (panic $ "[G.D.S.Ngrams.withMap] Should not happen" <> (cs $ show n))
230 identity (HashMap.lookup n m)
232 indexNgramsT :: HashMap Text NgramsId -> NgramsT Ngrams -> NgramsT (Indexed Int Ngrams)
233 indexNgramsT = fmap . indexNgramsWith . withMap
235 -- | TODO replace NgramsT whith Typed NgramsType Ngrams
236 indexTypedNgrams :: HashMap Text NgramsId
237 -> Typed NgramsType Ngrams
238 -> Typed NgramsType (Indexed Int Ngrams)
239 indexTypedNgrams = fmap . indexNgramsWith . withMap
241 indexNgrams :: HashMap Text NgramsId -> Ngrams -> Indexed Int Ngrams
242 indexNgrams = indexNgramsWith . withMap
244 indexNgramsWith :: (Text -> NgramsId) -> Ngrams -> Indexed Int Ngrams
245 indexNgramsWith f n = Indexed (f $ _ngramsTerms n) n