2 Module : Gargantext.Database.Schema.NgramsPostag
3 Description : Ngram connection to the Database
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Ngrams connection to the Database.
14 {-# OPTIONS_GHC -fno-warn-orphans #-}
15 {-# LANGUAGE Arrows #-}
16 {-# LANGUAGE FunctionalDependencies #-}
17 {-# LANGUAGE QuasiQuotes #-}
18 {-# LANGUAGE TemplateHaskell #-}
20 module Gargantext.Database.Schema.Ngrams
23 import Codec.Serialise (Serialise())
24 import Control.Lens (over)
25 import Control.Monad (mzero)
27 import Data.Aeson.Types (toJSONKeyText)
28 import Data.HashMap.Strict (HashMap)
29 import Data.Hashable (Hashable)
30 import Data.Map.Strict (fromList, lookup)
31 import Data.Maybe (fromMaybe)
32 import Data.Text (Text, splitOn, pack, strip)
33 import Database.PostgreSQL.Simple.FromField (returnError, ResultError(..))
34 import Gargantext.Core (HasDBid(..))
35 import Gargantext.Core.Types (TODO(..), Typed(..))
36 import Gargantext.Database.Schema.Prelude hiding (over)
37 import Gargantext.Database.Types
38 import Gargantext.Prelude
39 import Servant (FromHttpApiData(..), Proxy(..), ToHttpApiData(..))
40 import Test.QuickCheck (elements)
41 import Text.Read (read)
42 import qualified Data.ByteString.Char8 as B
43 import qualified Data.HashMap.Strict as HashMap
44 import qualified Database.PostgreSQL.Simple as PGS
50 data NgramsPoly id terms n = NgramsDB { _ngrams_id :: !id
51 , _ngrams_terms :: !terms
55 type NgramsWrite = NgramsPoly (Maybe (Field SqlInt4))
59 type NgramsRead = NgramsPoly (Field SqlInt4)
63 type NgramsDB = NgramsPoly Int Text Int
65 $(makeAdaptorAndInstance "pNgramsDb" ''NgramsPoly)
66 makeLenses ''NgramsPoly
69 ngramsTable :: Table NgramsWrite NgramsRead
70 ngramsTable = Table "ngrams" (pNgramsDb NgramsDB { _ngrams_id = optionalTableField "id"
71 , _ngrams_terms = requiredTableField "terms"
72 , _ngrams_n = requiredTableField "n"
76 -- | Main Ngrams Types
78 -- Typed Ngrams localize the context of the ngrams
79 -- ngrams in source field of document has Sources Type
80 -- ngrams in authors field of document has Authors Type
81 -- ngrams in text fields of documents has Terms Type (i.e. either title or abstract)
82 data NgramsType = Authors | Institutes | Sources | NgramsTerms
83 deriving (Eq, Show, Read, Ord, Enum, Bounded, Generic)
85 instance Serialise NgramsType
86 instance FromJSON NgramsType
88 parseJSON (String "Authors") = pure Authors
89 parseJSON (String "Institutes") = pure Institutes
90 parseJSON (String "Sources") = pure Sources
91 parseJSON (String "Terms") = pure NgramsTerms
92 parseJSON (String "NgramsTerms") = pure NgramsTerms
95 instance FromJSONKey NgramsType where
96 fromJSONKey = FromJSONKeyTextParser (parseJSON . String)
98 instance ToJSON NgramsType
100 toJSON Authors = String "Authors"
101 toJSON Institutes = String "Institutes"
102 toJSON Sources = String "Sources"
103 toJSON NgramsTerms = String "Terms"
105 instance ToJSONKey NgramsType where
106 toJSONKey = toJSONKeyText (pack . show)
107 instance FromHttpApiData NgramsType where
108 parseUrlPiece n = pure $ (read . cs) n
109 instance ToHttpApiData NgramsType where
110 toUrlPiece = pack . show
111 instance ToParamSchema NgramsType where
112 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
113 instance Arbitrary NgramsType where
114 arbitrary = elements [ minBound .. maxBound ]
116 -- map NgramsType to its assigned id
117 instance FromField NgramsType where
118 fromField fld mdata =
119 case B.unpack `fmap` mdata of
120 Nothing -> returnError UnexpectedNull fld ""
122 n <- fromField fld mdata
123 if (n :: Int) > 0 then
124 case fromNgramsTypeId (NgramsTypeId n) of
125 Nothing -> returnError ConversionFailed fld dat
128 returnError ConversionFailed fld dat
129 instance ToField NgramsType where
130 toField nt = toField $ ngramsTypeId nt
133 ngramsTypes :: [NgramsType]
134 ngramsTypes = [minBound..]
136 instance ToSchema NgramsType
138 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
141 newtype NgramsTypeId = NgramsTypeId Int
142 deriving (Eq, Show, Ord, Num)
143 instance ToField NgramsTypeId where
144 toField (NgramsTypeId n) = toField n
145 instance FromField NgramsTypeId where
146 fromField fld mdata = do
147 n <- fromField fld mdata
148 if (n :: Int) > 0 then return $ NgramsTypeId n
150 instance DefaultFromField (Nullable SqlInt4) NgramsTypeId
152 defaultFromField = fromPGSFromField
154 pgNgramsType :: NgramsType -> Field SqlInt4
155 pgNgramsType = pgNgramsTypeId . ngramsTypeId
157 pgNgramsTypeId :: NgramsTypeId -> Field SqlInt4
158 pgNgramsTypeId (NgramsTypeId n) = sqlInt4 n
160 ngramsTypeId :: NgramsType -> NgramsTypeId
161 ngramsTypeId Authors = 1
162 ngramsTypeId Institutes = 2
163 ngramsTypeId Sources = 3
164 ngramsTypeId NgramsTerms = 4
166 fromNgramsTypeId :: NgramsTypeId -> Maybe NgramsType
167 fromNgramsTypeId id = lookup id
168 $ fromList [ (ngramsTypeId nt,nt)
169 | nt <- [minBound .. maxBound] :: [NgramsType]
172 unNgramsTypeId :: NgramsTypeId -> Int
173 unNgramsTypeId (NgramsTypeId i) = i
175 toNgramsTypeId :: Int -> NgramsTypeId
176 toNgramsTypeId i = NgramsTypeId i
178 instance HasDBid NgramsType where
179 toDBid = unNgramsTypeId . ngramsTypeId
180 fromDBid = fromMaybe (panic "NgramsType id not indexed") . fromNgramsTypeId . toNgramsTypeId
182 ------------------------------------------------------------------------
183 ------------------------------------------------------------------------
184 -- | TODO put it in Gargantext.Core.Text.Ngrams
185 data Ngrams = UnsafeNgrams { _ngramsTerms :: Text
188 deriving (Generic, Show, Eq, Ord)
190 instance Hashable Ngrams
193 instance PGS.ToRow Ngrams where
194 toRow (UnsafeNgrams t s) = [toField t, toField s]
196 instance FromField Ngrams where
197 fromField fld mdata = do
198 x <- fromField fld mdata
201 instance PGS.ToRow Text where
202 toRow t = [toField t]
204 text2ngrams :: Text -> Ngrams
205 text2ngrams txt = UnsafeNgrams txt' $ length $ splitOn " " txt'
210 ------------------------------------------------------------------------
211 -------------------------------------------------------------------------
212 -- | TODO put it in Gargantext.Core.Text.Ngrams
213 -- Named entity are typed ngrams of Terms Ngrams
215 NgramsT { _ngramsType :: NgramsType
217 } deriving (Generic, Show, Eq, Ord)
221 instance Functor NgramsT where
224 -----------------------------------------------------------------------
225 withMap :: HashMap Text NgramsId -> Text -> NgramsId
226 withMap m n = maybe (panic $ "[G.D.S.Ngrams.withMap] Should not happen" <> (cs $ show n))
227 identity (HashMap.lookup n m)
229 indexNgramsT :: HashMap Text NgramsId -> NgramsT Ngrams -> NgramsT (Indexed Int Ngrams)
230 indexNgramsT = fmap . indexNgramsWith . withMap
232 -- | TODO replace NgramsT whith Typed NgramsType Ngrams
233 indexTypedNgrams :: HashMap Text NgramsId
234 -> Typed NgramsType Ngrams
235 -> Typed NgramsType (Indexed Int Ngrams)
236 indexTypedNgrams = fmap . indexNgramsWith . withMap
238 indexNgrams :: HashMap Text NgramsId -> Ngrams -> Indexed Int Ngrams
239 indexNgrams = indexNgramsWith . withMap
241 indexNgramsWith :: (Text -> NgramsId) -> Ngrams -> Indexed Int Ngrams
242 indexNgramsWith f n = Indexed (f $ _ngramsTerms n) n