2 Module : Gargantext.Database.Ngrams
3 Description : Ngram connection to the Database
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Ngrams connection to the Database.
14 {-# LANGUAGE Arrows #-}
15 {-# LANGUAGE DeriveGeneric #-}
16 {-# LANGUAGE FlexibleInstances #-}
17 {-# LANGUAGE FunctionalDependencies #-}
18 {-# LANGUAGE MultiParamTypeClasses #-}
19 {-# LANGUAGE NoImplicitPrelude #-}
20 {-# LANGUAGE OverloadedStrings #-}
21 {-# LANGUAGE QuasiQuotes #-}
22 {-# LANGUAGE TemplateHaskell #-}
24 module Gargantext.Database.Ngrams where
27 import Control.Lens (makeLenses)
28 import Data.ByteString.Internal (ByteString)
29 import Data.Map (Map, fromList, lookup)
30 import Data.Text (Text, splitOn)
31 import Database.PostgreSQL.Simple.FromRow (fromRow, field)
32 import Database.PostgreSQL.Simple.SqlQQ (sql)
33 import Database.PostgreSQL.Simple.ToField (toField)
34 import Database.PostgreSQL.Simple.ToRow (toRow)
35 import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
36 import GHC.Generics (Generic)
37 import Gargantext.Database.Node (mkCmd, Cmd(..))
38 import Gargantext.Prelude
39 import qualified Database.PostgreSQL.Simple as DPS
42 --data NgramPoly id terms n = NgramDb { ngram_id :: id
43 -- , ngram_terms :: terms
47 --type NgramWrite = NgramPoly (Maybe (Column PGInt4))
51 --type NgramRead = NgramPoly (Column PGInt4)
55 ----type Ngram = NgramPoly Int Text Int
57 -- $(makeAdaptorAndInstance "pNgram" ''NgramPoly)
58 -- $(makeLensesWith abbreviatedFields ''NgramPoly)
60 --ngramTable :: Table NgramWrite NgramRead
61 --ngramTable = Table "ngrams" (pNgram NgramDb { ngram_id = optional "id"
62 -- , ngram_terms = required "terms"
63 -- , ngram_n = required "n"
67 --queryNgramTable :: Query NgramRead
68 --queryNgramTable = queryTable ngramTable
70 --dbGetNgrams :: PGS.Connection -> IO [NgramDb]
71 --dbGetNgrams conn = runQuery conn queryNgramTable
73 -- | Main Ngrams Types
75 -- Typed Ngrams localize the context of the ngrams
76 -- ngrams in source field of document has Sources Type
77 -- ngrams in authors field of document has Authors Type
78 -- ngrams in text (title or abstract) of documents has Terms Type
79 data NgramsType = Authors | Institutes | Sources | Terms
82 ngramsTypeId :: NgramsType -> Int
83 ngramsTypeId Authors = 1
84 ngramsTypeId Institutes = 2
85 ngramsTypeId Sources = 3
86 ngramsTypeId Terms = 4
88 type NgramsTerms = Text
92 ------------------------------------------------------------------------
93 -- | TODO put it in Gargantext.Text.Ngrams
94 data Ngrams = Ngrams { _ngramsTerms :: Text
96 } deriving (Generic, Show)
97 instance Eq Ngrams where
99 instance Ord Ngrams where
102 instance DPS.ToRow Ngrams where
103 toRow (Ngrams t s) = [toField t, toField s]
105 text2ngrams :: Text -> Ngrams
106 text2ngrams txt = Ngrams txt $ length $ splitOn " " txt
108 -------------------------------------------------------------------------
109 -- | TODO put it in Gargantext.Text.Ngrams
110 -- Named entity are typed ngrams of Terms Ngrams
112 NgramsT { _ngramsType :: NgramsType
114 } deriving (Generic, Show)
116 instance Eq (NgramsT a)
122 instance Ord (NgramsT a) where compare = compare
126 -----------------------------------------------------------------------
130 , _ngramsId :: NgramsId
131 } deriving (Show, Generic)
133 instance Eq NgramsIndexed where
135 instance Ord NgramsIndexed where
137 makeLenses ''NgramsIndexed
139 ------------------------------------------------------------------------
144 } deriving (Show, Generic)
146 instance DPS.FromRow NgramIds where
147 fromRow = NgramIds <$> field <*> field
149 ----------------------
150 indexNgramsT :: Map NgramsTerms NgramsId -> NgramsT Ngrams -> NgramsT NgramsIndexed
151 indexNgramsT m ngrId = indexNgramsTWith f ngrId
153 f n = maybe (panic "indexNgramsT: should not happen") identity (lookup n m)
155 indexNgramsTWith :: (NgramsTerms -> NgramsId) -> NgramsT Ngrams-> NgramsT NgramsIndexed
156 indexNgramsTWith f (NgramsT t n) = NgramsT t (NgramsIndexed n ((f . _ngramsTerms) n))
158 insertNgrams :: [Ngrams] -> Cmd (Map NgramsTerms NgramsId)
159 insertNgrams ns = fromList <$> map (\(NgramIds i t) -> (t, i)) <$> (insertNgrams' ns)
161 insertNgrams' :: [Ngrams] -> Cmd [NgramIds]
162 insertNgrams' ns = mkCmd $ \conn -> DPS.query conn queryInsertNgrams (DPS.Only $ Values fields ns)
164 fields = map (\t -> QualifiedIdentifier Nothing t) ["text", "int4"]
167 insertNgrams_Debug :: [(NgramsTerms, Size)] -> Cmd ByteString
168 insertNgrams_Debug ns = mkCmd $ \conn -> DPS.formatQuery conn queryInsertNgrams (DPS.Only $ Values fields ns)
170 fields = map (\t -> QualifiedIdentifier Nothing t) ["text", "int4"]
172 ----------------------
173 queryInsertNgrams :: DPS.Query
174 queryInsertNgrams = [sql|
175 WITH input_rows(terms,n) AS (?)
177 INSERT INTO ngrams (terms,n)
178 SELECT * FROM input_rows
179 ON CONFLICT (terms) DO NOTHING -- unique index created here
188 JOIN ngrams c USING (terms); -- columns of unique index