]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Schema/Ngrams.hs
Merge branch 'dev' into 97-dev-istex-search
[gargantext.git] / src / Gargantext / Database / Schema / Ngrams.hs
1 {-|
2 Module : Gargantext.Database.Schema.NgramsPostag
3 Description : Ngram connection to the Database
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Ngrams connection to the Database.
11
12 -}
13
14 {-# OPTIONS_GHC -fno-warn-orphans #-}
15 {-# LANGUAGE Arrows #-}
16 {-# LANGUAGE FunctionalDependencies #-}
17 {-# LANGUAGE QuasiQuotes #-}
18 {-# LANGUAGE TemplateHaskell #-}
19
20 module Gargantext.Database.Schema.Ngrams
21 where
22
23 import Data.Maybe (fromMaybe)
24 import Data.HashMap.Strict (HashMap)
25 import Data.Hashable (Hashable)
26 import Codec.Serialise (Serialise())
27 import Control.Lens (over)
28 import Control.Monad (mzero)
29 import Data.Aeson
30 import Data.Aeson.Types (toJSONKeyText)
31 import Data.Map (fromList, lookup)
32 import Data.Text (Text, splitOn, pack, strip)
33 import Gargantext.Core.Types (TODO(..), Typed(..))
34 import Gargantext.Prelude
35 import Servant (FromHttpApiData(..), Proxy(..), ToHttpApiData(..))
36 import Text.Read (read)
37 import Gargantext.Core (HasDBid(..))
38 import Gargantext.Database.Types
39 import Gargantext.Database.Schema.Prelude
40 import qualified Database.PostgreSQL.Simple as PGS
41 import qualified Data.HashMap.Strict as HashMap
42
43
44 type NgramsId = Int
45 type Size = Int
46
47 data NgramsPoly id terms n = NgramsDB { _ngrams_id :: !id
48 , _ngrams_terms :: !terms
49 , _ngrams_n :: !n
50 } deriving (Show)
51
52 type NgramsWrite = NgramsPoly (Maybe (Column SqlInt4))
53 (Column SqlText)
54 (Column SqlInt4)
55
56 type NgramsRead = NgramsPoly (Column SqlInt4)
57 (Column SqlText)
58 (Column SqlInt4)
59
60 type NgramsReadNull = NgramsPoly (Column (Nullable SqlInt4))
61 (Column (Nullable SqlText))
62 (Column (Nullable SqlInt4))
63
64 type NgramsDB = NgramsPoly Int Text Int
65
66 $(makeAdaptorAndInstance "pNgramsDb" ''NgramsPoly)
67 makeLenses ''NgramsPoly
68
69
70 ngramsTable :: Table NgramsWrite NgramsRead
71 ngramsTable = Table "ngrams" (pNgramsDb NgramsDB { _ngrams_id = optionalTableField "id"
72 , _ngrams_terms = requiredTableField "terms"
73 , _ngrams_n = requiredTableField "n"
74 }
75 )
76
77 -- | Main Ngrams Types
78 -- | Typed Ngrams
79 -- Typed Ngrams localize the context of the ngrams
80 -- ngrams in source field of document has Sources Type
81 -- ngrams in authors field of document has Authors Type
82 -- ngrams in text fields of documents has Terms Type (i.e. either title or abstract)
83 data NgramsType = Authors | Institutes | Sources | NgramsTerms
84 deriving (Eq, Show, Read, Ord, Enum, Bounded, Generic)
85
86 instance Serialise NgramsType
87
88
89 ngramsTypes :: [NgramsType]
90 ngramsTypes = [minBound..]
91
92 instance ToSchema NgramsType
93 {- where
94 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
95 --}
96
97 newtype NgramsTypeId = NgramsTypeId Int
98 deriving (Eq, Show, Ord, Num)
99
100 instance ToField NgramsTypeId where
101 toField (NgramsTypeId n) = toField n
102
103 instance FromField NgramsTypeId where
104 fromField fld mdata = do
105 n <- fromField fld mdata
106 if (n :: Int) > 0 then return $ NgramsTypeId n
107 else mzero
108
109 instance FromJSON NgramsType
110 instance FromJSONKey NgramsType where
111 fromJSONKey = FromJSONKeyTextParser (parseJSON . String)
112
113 instance ToJSON NgramsType
114 instance ToJSONKey NgramsType where
115 toJSONKey = toJSONKeyText (pack . show)
116
117 instance FromHttpApiData NgramsType where
118 parseUrlPiece n = pure $ (read . cs) n
119 instance ToHttpApiData NgramsType where
120 toUrlPiece = pack . show
121
122 instance ToParamSchema NgramsType where
123 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
124
125
126 instance DefaultFromField (Nullable SqlInt4) NgramsTypeId
127 where
128 defaultFromField = fromPGSFromField
129
130 pgNgramsType :: NgramsType -> Column SqlInt4
131 pgNgramsType = pgNgramsTypeId . ngramsTypeId
132
133 pgNgramsTypeId :: NgramsTypeId -> Column SqlInt4
134 pgNgramsTypeId (NgramsTypeId n) = sqlInt4 n
135
136 ngramsTypeId :: NgramsType -> NgramsTypeId
137 ngramsTypeId Authors = 1
138 ngramsTypeId Institutes = 2
139 ngramsTypeId Sources = 3
140 ngramsTypeId NgramsTerms = 4
141
142 fromNgramsTypeId :: NgramsTypeId -> Maybe NgramsType
143 fromNgramsTypeId id = lookup id
144 $ fromList [ (ngramsTypeId nt,nt)
145 | nt <- [minBound .. maxBound] :: [NgramsType]
146 ]
147
148 unNgramsTypeId :: NgramsTypeId -> Int
149 unNgramsTypeId (NgramsTypeId i) = i
150
151 toNgramsTypeId :: Int -> NgramsTypeId
152 toNgramsTypeId i = NgramsTypeId i
153
154 instance HasDBid NgramsType where
155 toDBid = unNgramsTypeId . ngramsTypeId
156 fromDBid = fromMaybe (panic "NgramsType id not indexed") . fromNgramsTypeId . toNgramsTypeId
157
158 ------------------------------------------------------------------------
159 ------------------------------------------------------------------------
160 -- | TODO put it in Gargantext.Core.Text.Ngrams
161 data Ngrams = UnsafeNgrams { _ngramsTerms :: Text
162 , _ngramsSize :: Int
163 }
164 deriving (Generic, Show, Eq, Ord)
165
166 instance Hashable Ngrams
167
168 makeLenses ''Ngrams
169 instance PGS.ToRow Ngrams where
170 toRow (UnsafeNgrams t s) = [toField t, toField s]
171
172 instance FromField Ngrams where
173 fromField fld mdata = do
174 x <- fromField fld mdata
175 pure $ text2ngrams x
176
177 instance PGS.ToRow Text where
178 toRow t = [toField t]
179
180 text2ngrams :: Text -> Ngrams
181 text2ngrams txt = UnsafeNgrams txt' $ length $ splitOn " " txt'
182 where
183 txt' = strip txt
184
185
186 ------------------------------------------------------------------------
187 -------------------------------------------------------------------------
188 -- | TODO put it in Gargantext.Core.Text.Ngrams
189 -- Named entity are typed ngrams of Terms Ngrams
190 data NgramsT a =
191 NgramsT { _ngramsType :: NgramsType
192 , _ngramsT :: a
193 } deriving (Generic, Show, Eq, Ord)
194
195 makeLenses ''NgramsT
196
197 instance Functor NgramsT where
198 fmap = over ngramsT
199
200 -----------------------------------------------------------------------
201 withMap :: HashMap Text NgramsId -> Text -> NgramsId
202 withMap m n = maybe (panic $ "[G.D.S.Ngrams.withMap] Should not happen" <> (cs $ show n))
203 identity (HashMap.lookup n m)
204
205 indexNgramsT :: HashMap Text NgramsId -> NgramsT Ngrams -> NgramsT (Indexed Int Ngrams)
206 indexNgramsT = fmap . indexNgramsWith . withMap
207
208 -- | TODO replace NgramsT whith Typed NgramsType Ngrams
209 indexTypedNgrams :: HashMap Text NgramsId
210 -> Typed NgramsType Ngrams
211 -> Typed NgramsType (Indexed Int Ngrams)
212 indexTypedNgrams = fmap . indexNgramsWith . withMap
213
214 indexNgrams :: HashMap Text NgramsId -> Ngrams -> Indexed Int Ngrams
215 indexNgrams = indexNgramsWith . withMap
216
217 indexNgramsWith :: (Text -> NgramsId) -> Ngrams -> Indexed Int Ngrams
218 indexNgramsWith f n = Indexed (f $ _ngramsTerms n) n