]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Ngram.hs
[FLOW][DB][NGRAMS] group inserted in db.
[gargantext.git] / src / Gargantext / Database / Ngram.hs
1 {-|
2 Module : Gargantext.Database.Ngram
3 Description : Ngram connection to the Database
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Ngrams connection to the Database.
11
12 -}
13
14 {-# LANGUAGE Arrows #-}
15 {-# LANGUAGE DeriveGeneric #-}
16 {-# LANGUAGE FlexibleInstances #-}
17 {-# LANGUAGE FunctionalDependencies #-}
18 {-# LANGUAGE MultiParamTypeClasses #-}
19 {-# LANGUAGE NoImplicitPrelude #-}
20 {-# LANGUAGE OverloadedStrings #-}
21 {-# LANGUAGE QuasiQuotes #-}
22 {-# LANGUAGE TemplateHaskell #-}
23
24 module Gargantext.Database.Ngram where
25
26 -- import Opaleye
27 import Control.Lens (makeLenses)
28 import Data.ByteString.Internal (ByteString)
29 import Data.Map (Map, fromList, lookup)
30 import Data.Text (Text)
31 import Database.PostgreSQL.Simple.FromRow (fromRow, field)
32 import Database.PostgreSQL.Simple.SqlQQ (sql)
33 import Database.PostgreSQL.Simple.ToField (toField)
34 import Database.PostgreSQL.Simple.ToRow (toRow)
35 import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
36 import GHC.Generics (Generic)
37 import Gargantext.Database.Node (mkCmd, Cmd(..))
38 import Gargantext.Prelude
39 import qualified Database.PostgreSQL.Simple as DPS
40
41
42 --data NgramPoly id terms n = NgramDb { ngram_id :: id
43 -- , ngram_terms :: terms
44 -- , ngram_n :: n
45 -- } deriving (Show)
46 --
47 --type NgramWrite = NgramPoly (Maybe (Column PGInt4))
48 -- (Column PGText)
49 -- (Column PGInt4)
50 --
51 --type NgramRead = NgramPoly (Column PGInt4)
52 -- (Column PGText)
53 -- (Column PGInt4)
54 --
55 ----type Ngram = NgramPoly Int Text Int
56 --
57 -- $(makeAdaptorAndInstance "pNgram" ''NgramPoly)
58 -- $(makeLensesWith abbreviatedFields ''NgramPoly)
59 --
60 --ngramTable :: Table NgramWrite NgramRead
61 --ngramTable = Table "ngrams" (pNgram NgramDb { ngram_id = optional "id"
62 -- , ngram_terms = required "terms"
63 -- , ngram_n = required "n"
64 -- }
65 -- )
66 --
67 --queryNgramTable :: Query NgramRead
68 --queryNgramTable = queryTable ngramTable
69 --
70 --dbGetNgrams :: PGS.Connection -> IO [NgramDb]
71 --dbGetNgrams conn = runQuery conn queryNgramTable
72
73 -- | Main Ngrams Types
74 -- | Typed Ngrams
75 -- Typed Ngrams localize the context of the ngrams
76 -- ngrams in source field of document has Sources Type
77 -- ngrams in authors field of document has Authors Type
78 -- ngrams in text (title or abstract) of documents has Terms Type
79 data NgramsType = Sources | Authors | Terms
80
81 ngramsTypeId :: NgramsType -> Int
82 ngramsTypeId Terms = 1
83 ngramsTypeId Authors = 2
84 ngramsTypeId Sources = 3
85
86 type NgramsTerms = Text
87 type NgramsId = Int
88 type Size = Int
89
90 ------------------------------------------------------------------------
91 -- | TODO put it in Gargantext.Text.Ngrams
92 data Ngrams = Ngrams { _ngramsTerms :: Text
93 , _ngramsSize :: Int
94 } deriving (Generic)
95 instance Eq Ngrams where
96 (==) = (==)
97 instance Ord Ngrams where
98 compare = compare
99 makeLenses ''Ngrams
100 instance DPS.ToRow Ngrams where
101 toRow (Ngrams t s) = [toField t, toField s]
102
103 -------------------------------------------------------------------------
104 -- | TODO put it in Gargantext.Text.Ngrams
105 -- Named entity are typed ngrams of Terms Ngrams
106 data NgramsT a =
107 NgramsT { _ngramsType :: NgramsType
108 , _ngramsT :: a
109 } deriving (Generic)
110
111 instance Eq (NgramsT a) where (==) = (==)
112 instance Ord (NgramsT a) where compare = compare
113 makeLenses ''NgramsT
114 -----------------------------------------------------------------------
115 data NgramsIndexed =
116 NgramsIndexed
117 { _ngrams :: Ngrams
118 , _ngramsId :: NgramsId
119 } deriving (Generic)
120
121 instance Eq NgramsIndexed where
122 (==) = (==)
123 instance Ord NgramsIndexed where
124 compare = compare
125 makeLenses ''NgramsIndexed
126
127 ------------------------------------------------------------------------
128 data NgramIds =
129 NgramIds
130 { ngramId :: Int
131 , ngramTerms :: Text
132 } deriving (Show, Generic)
133
134 instance DPS.FromRow NgramIds where
135 fromRow = NgramIds <$> field <*> field
136
137 ----------------------
138 indexNgramsT :: Map NgramsTerms NgramsId -> NgramsT Ngrams -> NgramsT NgramsIndexed
139 indexNgramsT m ngrId = indexNgramsTWith f ngrId
140 where
141 f n = maybe (panic "indexNgramsT: should not happen") identity (lookup n m)
142
143 indexNgramsTWith :: (NgramsTerms -> NgramsId) -> NgramsT Ngrams-> NgramsT NgramsIndexed
144 indexNgramsTWith f (NgramsT t n) = NgramsT t (NgramsIndexed n ((f . _ngramsTerms) n))
145
146 insertNgrams :: [Ngrams] -> Cmd (Map NgramsTerms NgramsId)
147 insertNgrams ns = fromList <$> map (\(NgramIds i t) -> (t, i)) <$> (insertNgrams' ns)
148
149 insertNgrams' :: [Ngrams] -> Cmd [NgramIds]
150 insertNgrams' ns = mkCmd $ \conn -> DPS.query conn queryInsertNgrams (DPS.Only $ Values fields ns)
151 where
152 fields = map (\t -> QualifiedIdentifier Nothing t) ["text", "int4"]
153
154
155 insertNgrams_Debug :: [(NgramsTerms, Size)] -> Cmd ByteString
156 insertNgrams_Debug ns = mkCmd $ \conn -> DPS.formatQuery conn queryInsertNgrams (DPS.Only $ Values fields ns)
157 where
158 fields = map (\t -> QualifiedIdentifier Nothing t) ["text", "int4"]
159
160 ----------------------
161 queryInsertNgrams :: DPS.Query
162 queryInsertNgrams = [sql|
163 WITH input_rows(terms,n) AS (?)
164 , ins AS (
165 INSERT INTO ngrams (terms,n)
166 SELECT * FROM input_rows
167 ON CONFLICT (terms) DO NOTHING -- unique index created here
168 RETURNING id,terms
169 )
170
171 SELECT id, terms
172 FROM ins
173 UNION ALL
174 SELECT c.id, terms
175 FROM input_rows
176 JOIN ngrams c USING (terms); -- columns of unique index
177 |]