]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Schema/Ngrams.hs
[REFACT/CLEAN] TextFlow
[gargantext.git] / src / Gargantext / Database / Schema / Ngrams.hs
1 {-|
2 Module : Gargantext.Database.Schema.NgramsPostag
3 Description : Ngram connection to the Database
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Ngrams connection to the Database.
11
12 -}
13
14 {-# LANGUAGE Arrows #-}
15 {-# LANGUAGE FunctionalDependencies #-}
16 {-# LANGUAGE QuasiQuotes #-}
17 {-# LANGUAGE TemplateHaskell #-}
18
19 module Gargantext.Database.Schema.Ngrams
20 where
21
22 import Data.Hashable (Hashable)
23 import Codec.Serialise (Serialise())
24 import Control.Lens (over)
25 import Control.Monad (mzero)
26 import Data.Aeson
27 import Data.Aeson.Types (toJSONKeyText)
28 import Data.Map (Map, fromList, lookup)
29 import Data.Text (Text, splitOn, pack, strip)
30 import Gargantext.Core.Types (TODO(..), Typed(..))
31 import Gargantext.Prelude
32 import Prelude (Functor)
33 import Servant (FromHttpApiData, parseUrlPiece, Proxy(..))
34 import Text.Read (read)
35 import Gargantext.Database.Types
36 import Gargantext.Database.Schema.Prelude
37 import qualified Database.PostgreSQL.Simple as PGS
38
39
40 type NgramsId = Int
41 type Size = Int
42
43 data NgramsPoly id terms n = NgramsDB { _ngrams_id :: !id
44 , _ngrams_terms :: !terms
45 , _ngrams_n :: !n
46 } deriving (Show)
47
48 type NgramsWrite = NgramsPoly (Maybe (Column PGInt4))
49 (Column PGText)
50 (Column PGInt4)
51
52 type NgramsRead = NgramsPoly (Column PGInt4)
53 (Column PGText)
54 (Column PGInt4)
55
56 type NgramsReadNull = NgramsPoly (Column (Nullable PGInt4))
57 (Column (Nullable PGText))
58 (Column (Nullable PGInt4))
59
60 type NgramsDB = NgramsPoly Int Text Int
61
62 $(makeAdaptorAndInstance "pNgramsDb" ''NgramsPoly)
63 makeLenses ''NgramsPoly
64
65
66 ngramsTable :: Table NgramsWrite NgramsRead
67 ngramsTable = Table "ngrams" (pNgramsDb NgramsDB { _ngrams_id = optional "id"
68 , _ngrams_terms = required "terms"
69 , _ngrams_n = required "n"
70 }
71 )
72
73 -- | Main Ngrams Types
74 -- | Typed Ngrams
75 -- Typed Ngrams localize the context of the ngrams
76 -- ngrams in source field of document has Sources Type
77 -- ngrams in authors field of document has Authors Type
78 -- ngrams in text fields of documents has Terms Type (i.e. either title or abstract)
79 data NgramsType = Authors | Institutes | Sources | NgramsTerms
80 deriving (Eq, Show, Read, Ord, Enum, Bounded, Generic)
81
82 instance Serialise NgramsType
83
84 ngramsTypes :: [NgramsType]
85 ngramsTypes = [minBound..]
86
87 instance ToSchema NgramsType
88 {- where
89 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
90 --}
91
92 newtype NgramsTypeId = NgramsTypeId Int
93 deriving (Eq, Show, Ord, Num)
94
95 instance ToField NgramsTypeId where
96 toField (NgramsTypeId n) = toField n
97
98 instance FromField NgramsTypeId where
99 fromField fld mdata = do
100 n <- fromField fld mdata
101 if (n :: Int) > 0 then return $ NgramsTypeId n
102 else mzero
103
104 instance FromJSON NgramsType
105 instance FromJSONKey NgramsType where
106 fromJSONKey = FromJSONKeyTextParser (parseJSON . String)
107
108 instance ToJSON NgramsType
109 instance ToJSONKey NgramsType where
110 toJSONKey = toJSONKeyText (pack . show)
111
112 instance FromHttpApiData NgramsType where
113 parseUrlPiece n = pure $ (read . cs) n
114
115 instance ToParamSchema NgramsType where
116 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
117
118
119 instance QueryRunnerColumnDefault (Nullable PGInt4) NgramsTypeId
120 where
121 queryRunnerColumnDefault = fieldQueryRunnerColumn
122
123 pgNgramsType :: NgramsType -> Column PGInt4
124 pgNgramsType = pgNgramsTypeId . ngramsTypeId
125
126 pgNgramsTypeId :: NgramsTypeId -> Column PGInt4
127 pgNgramsTypeId (NgramsTypeId n) = pgInt4 n
128
129 ngramsTypeId :: NgramsType -> NgramsTypeId
130 ngramsTypeId Authors = 1
131 ngramsTypeId Institutes = 2
132 ngramsTypeId Sources = 3
133 ngramsTypeId NgramsTerms = 4
134
135 fromNgramsTypeId :: NgramsTypeId -> Maybe NgramsType
136 fromNgramsTypeId id = lookup id
137 $ fromList [ (ngramsTypeId nt,nt)
138 | nt <- [minBound .. maxBound] :: [NgramsType]
139 ]
140
141 ------------------------------------------------------------------------
142 ------------------------------------------------------------------------
143 -- | TODO put it in Gargantext.Core.Text.Ngrams
144 data Ngrams = UnsafeNgrams { _ngramsTerms :: Text
145 , _ngramsSize :: Int
146 }
147 deriving (Generic, Show, Eq, Ord)
148
149 makeLenses ''Ngrams
150 instance PGS.ToRow Ngrams where
151 toRow (UnsafeNgrams t s) = [toField t, toField s]
152
153 instance FromField Ngrams where
154 fromField fld mdata = do
155 x <- fromField fld mdata
156 pure $ text2ngrams x
157
158 text2ngrams :: Text -> Ngrams
159 text2ngrams txt = UnsafeNgrams txt' $ length $ splitOn " " txt'
160 where
161 txt' = strip txt
162
163
164 ------------------------------------------------------------------------
165 -------------------------------------------------------------------------
166 -- | TODO put it in Gargantext.Core.Text.Ngrams
167 -- Named entity are typed ngrams of Terms Ngrams
168 data NgramsT a =
169 NgramsT { _ngramsType :: NgramsType
170 , _ngramsT :: a
171 } deriving (Generic, Show, Eq, Ord)
172
173 makeLenses ''NgramsT
174
175 instance Functor NgramsT where
176 fmap = over ngramsT
177
178 -----------------------------------------------------------------------
179 withMap :: Map Text NgramsId -> Text -> NgramsId
180 withMap m n = maybe (panic "withMap: should not happen") identity (lookup n m)
181
182 indexNgramsT :: Map Text NgramsId -> NgramsT Ngrams -> NgramsT (Indexed Int Ngrams)
183 indexNgramsT = fmap . indexNgramsWith . withMap
184
185 -- | TODO replace NgramsT whith Typed NgramsType Ngrams
186 indexTypedNgrams :: Map Text NgramsId
187 -> Typed NgramsType Ngrams
188 -> Typed NgramsType (Indexed Int Ngrams)
189 indexTypedNgrams = fmap . indexNgramsWith . withMap
190
191 indexNgrams :: Map Text NgramsId -> Ngrams -> Indexed Int Ngrams
192 indexNgrams = indexNgramsWith . withMap
193
194 indexNgramsWith :: (Text -> NgramsId) -> Ngrams -> Indexed Int Ngrams
195 indexNgramsWith f n = Indexed (f $ _ngramsTerms n) n