2 Module : Gargantext.Database.Schema.NgramsPostag
3 Description : Ngram connection to the Database
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Ngrams connection to the Database.
14 {-# OPTIONS_GHC -fno-warn-orphans #-}
15 {-# LANGUAGE Arrows #-}
16 {-# LANGUAGE FunctionalDependencies #-}
17 {-# LANGUAGE QuasiQuotes #-}
18 {-# LANGUAGE TemplateHaskell #-}
20 module Gargantext.Database.Schema.Ngrams
23 import Codec.Serialise (Serialise())
24 import Control.Lens (over)
25 import Control.Monad (mzero)
27 import Data.Aeson.Types (toJSONKeyText)
28 import Data.HashMap.Strict (HashMap)
29 import Data.Hashable (Hashable)
30 import Data.Map.Strict (fromList, lookup)
31 import Data.Maybe (fromMaybe)
32 import Data.Text (Text, splitOn, pack, strip)
33 import Database.PostgreSQL.Simple.FromField (returnError, ResultError(..))
34 import Gargantext.Core (HasDBid(..))
35 import Gargantext.Core.Types (TODO(..), Typed(..))
36 import Gargantext.Database.Schema.Prelude
37 import Gargantext.Database.Types
38 import Gargantext.Prelude
39 import Servant (FromHttpApiData(..), Proxy(..), ToHttpApiData(..))
40 import Test.QuickCheck (elements)
41 import Text.Read (read)
42 import qualified Data.ByteString.Char8 as B
43 import qualified Data.HashMap.Strict as HashMap
44 import qualified Database.PostgreSQL.Simple as PGS
50 data NgramsPoly id terms n = NgramsDB { _ngrams_id :: !id
51 , _ngrams_terms :: !terms
55 type NgramsWrite = NgramsPoly (Maybe (Column SqlInt4))
59 type NgramsRead = NgramsPoly (Column SqlInt4)
63 type NgramsReadNull = NgramsPoly (Column (Nullable SqlInt4))
64 (Column (Nullable SqlText))
65 (Column (Nullable SqlInt4))
67 type NgramsDB = NgramsPoly Int Text Int
69 $(makeAdaptorAndInstance "pNgramsDb" ''NgramsPoly)
70 makeLenses ''NgramsPoly
73 ngramsTable :: Table NgramsWrite NgramsRead
74 ngramsTable = Table "ngrams" (pNgramsDb NgramsDB { _ngrams_id = optionalTableField "id"
75 , _ngrams_terms = requiredTableField "terms"
76 , _ngrams_n = requiredTableField "n"
80 -- | Main Ngrams Types
82 -- Typed Ngrams localize the context of the ngrams
83 -- ngrams in source field of document has Sources Type
84 -- ngrams in authors field of document has Authors Type
85 -- ngrams in text fields of documents has Terms Type (i.e. either title or abstract)
86 data NgramsType = Authors | Institutes | Sources | NgramsTerms
87 deriving (Eq, Show, Read, Ord, Enum, Bounded, Generic)
89 instance Serialise NgramsType
90 instance FromJSON NgramsType
92 parseJSON (String "Authors") = pure Authors
93 parseJSON (String "Institutes") = pure Institutes
94 parseJSON (String "Sources") = pure Sources
95 parseJSON (String "Terms") = pure NgramsTerms
96 parseJSON (String "NgramsTerms") = pure NgramsTerms
99 instance FromJSONKey NgramsType where
100 fromJSONKey = FromJSONKeyTextParser (parseJSON . String)
102 instance ToJSON NgramsType
104 toJSON Authors = String "Authors"
105 toJSON Institutes = String "Institutes"
106 toJSON Sources = String "Sources"
107 toJSON NgramsTerms = String "Terms"
109 instance ToJSONKey NgramsType where
110 toJSONKey = toJSONKeyText (pack . show)
111 instance FromHttpApiData NgramsType where
112 parseUrlPiece n = pure $ (read . cs) n
113 instance ToHttpApiData NgramsType where
114 toUrlPiece = pack . show
115 instance ToParamSchema NgramsType where
116 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
117 instance Arbitrary NgramsType where
118 arbitrary = elements [ minBound .. maxBound ]
120 -- map NgramsType to its assigned id
121 instance FromField NgramsType where
122 fromField fld mdata =
123 case B.unpack `fmap` mdata of
124 Nothing -> returnError UnexpectedNull fld ""
126 n <- fromField fld mdata
127 if (n :: Int) > 0 then
128 case fromNgramsTypeId (NgramsTypeId n) of
129 Nothing -> returnError ConversionFailed fld dat
132 returnError ConversionFailed fld dat
133 instance ToField NgramsType where
134 toField nt = toField $ ngramsTypeId nt
137 ngramsTypes :: [NgramsType]
138 ngramsTypes = [minBound..]
140 instance ToSchema NgramsType
142 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
145 newtype NgramsTypeId = NgramsTypeId Int
146 deriving (Eq, Show, Ord, Num)
147 instance ToField NgramsTypeId where
148 toField (NgramsTypeId n) = toField n
149 instance FromField NgramsTypeId where
150 fromField fld mdata = do
151 n <- fromField fld mdata
152 if (n :: Int) > 0 then return $ NgramsTypeId n
154 instance DefaultFromField (Nullable SqlInt4) NgramsTypeId
156 defaultFromField = fromPGSFromField
158 pgNgramsType :: NgramsType -> Column SqlInt4
159 pgNgramsType = pgNgramsTypeId . ngramsTypeId
161 pgNgramsTypeId :: NgramsTypeId -> Column SqlInt4
162 pgNgramsTypeId (NgramsTypeId n) = sqlInt4 n
164 ngramsTypeId :: NgramsType -> NgramsTypeId
165 ngramsTypeId Authors = 1
166 ngramsTypeId Institutes = 2
167 ngramsTypeId Sources = 3
168 ngramsTypeId NgramsTerms = 4
170 fromNgramsTypeId :: NgramsTypeId -> Maybe NgramsType
171 fromNgramsTypeId id = lookup id
172 $ fromList [ (ngramsTypeId nt,nt)
173 | nt <- [minBound .. maxBound] :: [NgramsType]
176 unNgramsTypeId :: NgramsTypeId -> Int
177 unNgramsTypeId (NgramsTypeId i) = i
179 toNgramsTypeId :: Int -> NgramsTypeId
180 toNgramsTypeId i = NgramsTypeId i
182 instance HasDBid NgramsType where
183 toDBid = unNgramsTypeId . ngramsTypeId
184 fromDBid = fromMaybe (panic "NgramsType id not indexed") . fromNgramsTypeId . toNgramsTypeId
186 ------------------------------------------------------------------------
187 ------------------------------------------------------------------------
188 -- | TODO put it in Gargantext.Core.Text.Ngrams
189 data Ngrams = UnsafeNgrams { _ngramsTerms :: Text
192 deriving (Generic, Show, Eq, Ord)
194 instance Hashable Ngrams
197 instance PGS.ToRow Ngrams where
198 toRow (UnsafeNgrams t s) = [toField t, toField s]
200 instance FromField Ngrams where
201 fromField fld mdata = do
202 x <- fromField fld mdata
205 instance PGS.ToRow Text where
206 toRow t = [toField t]
208 text2ngrams :: Text -> Ngrams
209 text2ngrams txt = UnsafeNgrams txt' $ length $ splitOn " " txt'
214 ------------------------------------------------------------------------
215 -------------------------------------------------------------------------
216 -- | TODO put it in Gargantext.Core.Text.Ngrams
217 -- Named entity are typed ngrams of Terms Ngrams
219 NgramsT { _ngramsType :: NgramsType
221 } deriving (Generic, Show, Eq, Ord)
225 instance Functor NgramsT where
228 -----------------------------------------------------------------------
229 withMap :: HashMap Text NgramsId -> Text -> NgramsId
230 withMap m n = maybe (panic $ "[G.D.S.Ngrams.withMap] Should not happen" <> (cs $ show n))
231 identity (HashMap.lookup n m)
233 indexNgramsT :: HashMap Text NgramsId -> NgramsT Ngrams -> NgramsT (Indexed Int Ngrams)
234 indexNgramsT = fmap . indexNgramsWith . withMap
236 -- | TODO replace NgramsT whith Typed NgramsType Ngrams
237 indexTypedNgrams :: HashMap Text NgramsId
238 -> Typed NgramsType Ngrams
239 -> Typed NgramsType (Indexed Int Ngrams)
240 indexTypedNgrams = fmap . indexNgramsWith . withMap
242 indexNgrams :: HashMap Text NgramsId -> Ngrams -> Indexed Int Ngrams
243 indexNgrams = indexNgramsWith . withMap
245 indexNgramsWith :: (Text -> NgramsId) -> Ngrams -> Indexed Int Ngrams
246 indexNgramsWith f n = Indexed (f $ _ngramsTerms n) n