2 Module : Gargantext.Database.Schema.NgramsPostag
3 Description : Ngram connection to the Database
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Ngrams connection to the Database.
14 {-# OPTIONS_GHC -fno-warn-orphans #-}
15 {-# LANGUAGE Arrows #-}
16 {-# LANGUAGE FunctionalDependencies #-}
17 {-# LANGUAGE QuasiQuotes #-}
18 {-# LANGUAGE TemplateHaskell #-}
20 module Gargantext.Database.Schema.Ngrams
23 import Codec.Serialise (Serialise())
24 import Control.Lens (over)
25 import Control.Monad (mzero)
26 import Data.Maybe (fromMaybe)
27 import Data.HashMap.Strict (HashMap)
28 import Data.Hashable (Hashable)
30 import Data.Aeson.Types (toJSONKeyText)
31 import Data.Map (fromList, lookup)
32 import Database.PostgreSQL.Simple.FromField (returnError, ResultError(..))
33 import Data.Text (Text, splitOn, pack, strip)
34 import Gargantext.Core.Types (TODO(..), Typed(..))
35 import Gargantext.Prelude
36 import Servant (FromHttpApiData(..), Proxy(..), ToHttpApiData(..))
37 import Gargantext.Core (HasDBid(..))
38 import Gargantext.Database.Types
39 import Gargantext.Database.Schema.Prelude
40 import Text.Read (read)
41 import qualified Data.ByteString.Char8 as B
42 import qualified Data.HashMap.Strict as HashMap
43 import qualified Database.PostgreSQL.Simple as PGS
49 data NgramsPoly id terms n = NgramsDB { _ngrams_id :: !id
50 , _ngrams_terms :: !terms
54 type NgramsWrite = NgramsPoly (Maybe (Column SqlInt4))
58 type NgramsRead = NgramsPoly (Column SqlInt4)
62 type NgramsReadNull = NgramsPoly (Column (Nullable SqlInt4))
63 (Column (Nullable SqlText))
64 (Column (Nullable SqlInt4))
66 type NgramsDB = NgramsPoly Int Text Int
68 $(makeAdaptorAndInstance "pNgramsDb" ''NgramsPoly)
69 makeLenses ''NgramsPoly
72 ngramsTable :: Table NgramsWrite NgramsRead
73 ngramsTable = Table "ngrams" (pNgramsDb NgramsDB { _ngrams_id = optionalTableField "id"
74 , _ngrams_terms = requiredTableField "terms"
75 , _ngrams_n = requiredTableField "n"
79 -- | Main Ngrams Types
81 -- Typed Ngrams localize the context of the ngrams
82 -- ngrams in source field of document has Sources Type
83 -- ngrams in authors field of document has Authors Type
84 -- ngrams in text fields of documents has Terms Type (i.e. either title or abstract)
85 data NgramsType = Authors | Institutes | Sources | NgramsTerms
86 deriving (Eq, Show, Read, Ord, Enum, Bounded, Generic)
87 instance Serialise NgramsType
88 instance FromJSON NgramsType
89 instance FromJSONKey NgramsType where
90 fromJSONKey = FromJSONKeyTextParser (parseJSON . String)
91 instance ToJSON NgramsType
92 instance ToJSONKey NgramsType where
93 toJSONKey = toJSONKeyText (pack . show)
94 instance FromHttpApiData NgramsType where
95 parseUrlPiece n = pure $ (read . cs) n
96 instance ToHttpApiData NgramsType where
97 toUrlPiece = pack . show
98 instance ToParamSchema NgramsType where
99 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
100 -- map NgramsType to its assigned id
101 instance FromField NgramsType where
102 fromField fld mdata =
103 case B.unpack `fmap` mdata of
104 Nothing -> returnError UnexpectedNull fld ""
106 n <- fromField fld mdata
107 if (n :: Int) > 0 then
108 case fromNgramsTypeId (NgramsTypeId n) of
109 Nothing -> returnError ConversionFailed fld dat
112 returnError ConversionFailed fld dat
113 instance ToField NgramsType where
114 toField nt = toField $ ngramsTypeId nt
117 ngramsTypes :: [NgramsType]
118 ngramsTypes = [minBound..]
120 instance ToSchema NgramsType
122 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
125 newtype NgramsTypeId = NgramsTypeId Int
126 deriving (Eq, Show, Ord, Num)
127 instance ToField NgramsTypeId where
128 toField (NgramsTypeId n) = toField n
129 instance FromField NgramsTypeId where
130 fromField fld mdata = do
131 n <- fromField fld mdata
132 if (n :: Int) > 0 then return $ NgramsTypeId n
134 instance DefaultFromField (Nullable SqlInt4) NgramsTypeId
136 defaultFromField = fromPGSFromField
138 pgNgramsType :: NgramsType -> Column SqlInt4
139 pgNgramsType = pgNgramsTypeId . ngramsTypeId
141 pgNgramsTypeId :: NgramsTypeId -> Column SqlInt4
142 pgNgramsTypeId (NgramsTypeId n) = sqlInt4 n
144 ngramsTypeId :: NgramsType -> NgramsTypeId
145 ngramsTypeId Authors = 1
146 ngramsTypeId Institutes = 2
147 ngramsTypeId Sources = 3
148 ngramsTypeId NgramsTerms = 4
150 fromNgramsTypeId :: NgramsTypeId -> Maybe NgramsType
151 fromNgramsTypeId id = lookup id
152 $ fromList [ (ngramsTypeId nt,nt)
153 | nt <- [minBound .. maxBound] :: [NgramsType]
156 unNgramsTypeId :: NgramsTypeId -> Int
157 unNgramsTypeId (NgramsTypeId i) = i
159 toNgramsTypeId :: Int -> NgramsTypeId
160 toNgramsTypeId i = NgramsTypeId i
162 instance HasDBid NgramsType where
163 toDBid = unNgramsTypeId . ngramsTypeId
164 fromDBid = fromMaybe (panic "NgramsType id not indexed") . fromNgramsTypeId . toNgramsTypeId
166 ------------------------------------------------------------------------
167 ------------------------------------------------------------------------
168 -- | TODO put it in Gargantext.Core.Text.Ngrams
169 data Ngrams = UnsafeNgrams { _ngramsTerms :: Text
172 deriving (Generic, Show, Eq, Ord)
174 instance Hashable Ngrams
177 instance PGS.ToRow Ngrams where
178 toRow (UnsafeNgrams t s) = [toField t, toField s]
180 instance FromField Ngrams where
181 fromField fld mdata = do
182 x <- fromField fld mdata
185 instance PGS.ToRow Text where
186 toRow t = [toField t]
188 text2ngrams :: Text -> Ngrams
189 text2ngrams txt = UnsafeNgrams txt' $ length $ splitOn " " txt'
194 ------------------------------------------------------------------------
195 -------------------------------------------------------------------------
196 -- | TODO put it in Gargantext.Core.Text.Ngrams
197 -- Named entity are typed ngrams of Terms Ngrams
199 NgramsT { _ngramsType :: NgramsType
201 } deriving (Generic, Show, Eq, Ord)
205 instance Functor NgramsT where
208 -----------------------------------------------------------------------
209 withMap :: HashMap Text NgramsId -> Text -> NgramsId
210 withMap m n = maybe (panic $ "[G.D.S.Ngrams.withMap] Should not happen" <> (cs $ show n))
211 identity (HashMap.lookup n m)
213 indexNgramsT :: HashMap Text NgramsId -> NgramsT Ngrams -> NgramsT (Indexed Int Ngrams)
214 indexNgramsT = fmap . indexNgramsWith . withMap
216 -- | TODO replace NgramsT whith Typed NgramsType Ngrams
217 indexTypedNgrams :: HashMap Text NgramsId
218 -> Typed NgramsType Ngrams
219 -> Typed NgramsType (Indexed Int Ngrams)
220 indexTypedNgrams = fmap . indexNgramsWith . withMap
222 indexNgrams :: HashMap Text NgramsId -> Ngrams -> Indexed Int Ngrams
223 indexNgrams = indexNgramsWith . withMap
225 indexNgramsWith :: (Text -> NgramsId) -> Ngrams -> Indexed Int Ngrams
226 indexNgramsWith f n = Indexed (f $ _ngramsTerms n) n