]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Schema/NodeNgram.hs
[FEAT] Ngrams extraction unsupervized (doc, type, function).
[gargantext.git] / src / Gargantext / Database / Schema / NodeNgram.hs
1 {-|
2 Module : Gargantext.Database.Schema.NodeNgrams
3 Description : NodeNgram for Ngram indexation or Lists
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 NodeNgram: relation between a Node and a Ngrams
11
12 if Node is a Document then it is indexing
13 if Node is a List then it is listing (either Stop, Candidate or Map)
14
15 -}
16
17 {-# OPTIONS_GHC -fno-warn-orphans #-}
18
19 {-# LANGUAGE Arrows #-}
20 {-# LANGUAGE FlexibleInstances #-}
21 {-# LANGUAGE FunctionalDependencies #-}
22 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
23 {-# LANGUAGE MultiParamTypeClasses #-}
24 {-# LANGUAGE NoImplicitPrelude #-}
25 {-# LANGUAGE OverloadedStrings #-}
26 {-# LANGUAGE QuasiQuotes #-}
27 {-# LANGUAGE RankNTypes #-}
28 {-# LANGUAGE TemplateHaskell #-}
29
30
31 -- TODO NodeNgrams
32 module Gargantext.Database.Schema.NodeNgram where
33
34 import Data.ByteString (ByteString)
35 import Data.Text (Text)
36 import Control.Lens.TH (makeLenses)
37 import Control.Monad (void)
38 import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
39 import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
40 import Database.PostgreSQL.Simple.SqlQQ (sql)
41 import Gargantext.Database.Utils (mkCmd, Cmd, execPGSQuery)
42 import Gargantext.Core.Types.Main (ListTypeId)
43 import Gargantext.Database.Types.Node (NodeId, ListId)
44 import Gargantext.Database.Schema.Node (pgNodeId)
45 import Gargantext.Database.Schema.Ngrams (NgramsTypeId, pgNgramsTypeId)
46 import Gargantext.Prelude
47 import Gargantext.Database.Utils (formatPGSQuery)
48 import Opaleye
49 import qualified Database.PostgreSQL.Simple as DPS
50
51 -- | TODO : remove id
52 data NodeNgramPoly node_id ngrams_id parent_id ngrams_type list_type weight
53 = NodeNgram { nng_node_id :: node_id
54 , nng_ngrams_id :: ngrams_id
55 , nng_parent_id :: parent_id
56
57 , nng_ngramsType :: ngrams_type
58 , nng_listType :: list_type
59 , nng_weight :: weight
60 } deriving (Show)
61
62 type NodeNgramWrite =
63 NodeNgramPoly
64 (Column PGInt4 )
65 (Column PGInt4 )
66 (Maybe (Column PGInt4))
67
68 (Column PGInt4 )
69 (Column PGInt4 )
70 (Column PGFloat8)
71
72 type NodeNgramRead =
73 NodeNgramPoly
74 (Column PGInt4 )
75 (Column PGInt4 )
76 (Column PGInt4 )
77
78 (Column PGInt4 )
79 (Column PGInt4 )
80 (Column PGFloat8)
81
82 type NodeNgramReadNull =
83 NodeNgramPoly
84 (Column (Nullable PGInt4 ))
85 (Column (Nullable PGInt4 ))
86 (Column (Nullable PGInt4 ))
87
88 (Column (Nullable PGInt4 ))
89 (Column (Nullable PGInt4 ))
90 (Column (Nullable PGFloat8))
91
92 type NodeNgram =
93 NodeNgramPoly NodeId Int (Maybe NgramsParentId) NgramsTypeId Int Double
94
95 newtype NgramsParentId = NgramsParentId Int
96 deriving (Show, Eq, Num)
97
98 pgNgramsParentId :: NgramsParentId -> Column PGInt4
99 pgNgramsParentId (NgramsParentId n) = pgInt4 n
100
101 $(makeAdaptorAndInstance "pNodeNgram" ''NodeNgramPoly)
102 makeLenses ''NodeNgramPoly
103
104 nodeNgramTable :: Table NodeNgramWrite NodeNgramRead
105 nodeNgramTable = Table "nodes_ngrams"
106 ( pNodeNgram NodeNgram
107 { nng_node_id = required "node_id"
108 , nng_ngrams_id = required "ngrams_id"
109 , nng_parent_id = optional "parent_id"
110 , nng_ngramsType = required "ngrams_type"
111 , nng_listType = required "list_type"
112 , nng_weight = required "weight"
113 }
114 )
115
116 queryNodeNgramTable :: Query NodeNgramRead
117 queryNodeNgramTable = queryTable nodeNgramTable
118
119 --{-
120 insertNodeNgrams :: [NodeNgram] -> Cmd err Int
121 insertNodeNgrams = insertNodeNgramW
122 . map (\(NodeNgram n g p ngt lt w) ->
123 NodeNgram (pgNodeId n)
124 (pgInt4 g)
125 (pgNgramsParentId <$> p)
126 (pgNgramsTypeId ngt)
127 (pgInt4 lt)
128 (pgDouble w)
129 )
130 insertNodeNgramW :: [NodeNgramWrite] -> Cmd err Int
131 insertNodeNgramW nns =
132 mkCmd $ \c -> fromIntegral <$> runInsert_ c insertNothing
133 where
134 insertNothing = (Insert { iTable = nodeNgramTable
135 , iRows = nns
136 , iReturning = rCount
137 , iOnConflict = (Just DoNothing)
138 })
139 --}
140 type NgramsText = Text
141
142 updateNodeNgrams' :: ListId -> [(NgramsTypeId, NgramsText, ListTypeId)] -> Cmd err ()
143 updateNodeNgrams' _ [] = pure ()
144 updateNodeNgrams' listId input = void $ execPGSQuery updateQuery (DPS.Only $ Values fields input')
145 where
146 fields = map (\t-> QualifiedIdentifier Nothing t) ["int4","int4","text","int4"]
147 input' = map (\(nt,t,lt) -> (listId, nt, t, lt)) input
148
149 updateNodeNgrams'_debug :: ListId -> [(NgramsTypeId, NgramsText, ListTypeId)] -> Cmd err ByteString
150 updateNodeNgrams'_debug listId input = formatPGSQuery updateQuery (DPS.Only $ Values fields input')
151 where
152 fields = map (\t-> QualifiedIdentifier Nothing t) ["int4","int4","text","int4"]
153 input' = map (\(nt,t,lt) -> (listId, nt, t, lt)) input
154
155 updateQuery :: DPS.Query
156 updateQuery = [sql|
157 WITH new(node_id,ngrams_type,terms,typeList) as (?)
158
159 INSERT into nodes_ngrams (node_id,ngrams_id,ngrams_type,list_type,weight)
160
161 SELECT node_id,ngrams.id,ngrams_type,typeList,1 FROM new
162 JOIN ngrams ON ngrams.terms = new.terms
163 ON CONFLICT (node_id, ngrams_id, ngrams_type) DO
164 -- DO NOTHING
165
166 UPDATE SET list_type = excluded.list_type
167 ;
168 |]