]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Action/Flow/Pairing.hs
[Community] Type Design (WIP)
[gargantext.git] / src / Gargantext / Database / Action / Flow / Pairing.hs
1 {-|
2 Module : Gargantext.Database.Flow
3 Description : Database Flow
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 # Spécifications for pairing
11
12 database:
13
14 add NodeType Community (instead of texts, contacts)
15
16 nodes_nodes
17 corpusId_communitId
18
19 get defaultList Id of each (for now)
20
21 corpusId_docId
22 listId_ngramsId (authors)
23
24 listId_docId_[ngrams]
25 listId_contactId_[ngramsId']
26
27
28 if isSame ngramsId ngramsId'
29 then
30 insert listId_docId_contactId
31 else
32 nothing
33
34
35 -}
36
37 {-# LANGUAGE QuasiQuotes #-}
38 -- {-# LANGUAGE Arrows #-}
39
40 module Gargantext.Database.Action.Flow.Pairing
41 (pairing)
42 where
43
44 import Data.Set (Set)
45 import Control.Lens (_Just, (^.))
46 import Data.Map (Map, fromList, fromListWith)
47 import Data.Maybe (catMaybes)
48 import Data.Text (Text, toLower)
49 import Database.PostgreSQL.Simple.SqlQQ (sql)
50 import Gargantext.Core.Types (TableResult(..))
51 import Gargantext.Database.Action.Flow.Utils
52 import Gargantext.Database.Admin.Types.Node (AnnuaireId, CorpusId, ListId{-, DocId, ContactId-})
53 import Gargantext.Database.Prelude (Cmd, runPGSQuery)
54 import Gargantext.Database.Query.Table.Node.Children (getAllContacts)
55 import Gargantext.Database.Admin.Types.Hyperdata -- (HyperdataContact(..))
56 import Gargantext.Database.Schema.Ngrams -- (NgramsType(..))
57 import Gargantext.Prelude hiding (sum)
58 import Safe (lastMay)
59 import qualified Data.Map as DM
60 import qualified Data.Text as DT
61 import qualified Data.Set as Set
62
63 -- TODO mv this type in Types Main
64 type Terms = Text
65
66 {-
67 pairing'' :: (CorpusId, CorpusId) -> (DocId -> DocId)
68 pairing'' = undefined
69
70 pairing' :: (CorpusId, AnnuaireId) -> (DocId -> ContactId)
71 pairing' = undefined
72 -}
73
74 -- | TODO : add paring policy as parameter
75 pairing :: CorpusId -- (CorpusId, ListId) -- Pair (Either CorpusId AnnuaireId) ListId
76 -> AnnuaireId -- (AnnuaireId, ListId) -- Pair (Either CorpusId AnnuaireId) ListId
77 -> ListId
78 -> Cmd err Int
79 pairing cId aId lId = do
80 contacts' <- getAllContacts aId
81 let contactsMap = pairingPolicyToMap toLower
82 $ toMaps extractNgramsT (tr_docs contacts')
83
84 ngramsMap' <- getNgramsTindexed cId Authors
85 let ngramsMap = pairingPolicyToMap lastName ngramsMap'
86
87 let indexedNgrams = pairMaps contactsMap ngramsMap
88
89 insertDocNgrams lId indexedNgrams
90
91 -- TODO: this method is dangerous (maybe equalities of the result are
92 -- not taken into account emergency demo plan...)
93 pairingPolicyToMap :: (Terms -> Terms)
94 -> Map (NgramsT Ngrams) a
95 -> Map (NgramsT Ngrams) a
96 pairingPolicyToMap f = DM.mapKeys (pairingPolicy f)
97
98 lastName :: Terms -> Terms
99 lastName texte = DT.toLower
100 $ maybe texte (\x -> if DT.length x > 3 then x else texte)
101 (lastName' texte)
102 where
103 lastName' = lastMay . DT.splitOn " "
104
105
106 pairingPolicy :: (Terms -> Terms)
107 -> NgramsT Ngrams
108 -> NgramsT Ngrams
109 pairingPolicy f (NgramsT nt (Ngrams ng _)) = (NgramsT nt (Ngrams (f ng) 1))
110
111 -- | TODO : use Occurrences in place of Int
112 extractNgramsT :: HyperdataContact
113 -> Map (NgramsT Ngrams) Int
114 extractNgramsT contact = fromList [(NgramsT Authors a' , 1)| a' <- authors ]
115 where
116 authors = map text2ngrams
117 $ catMaybes [ contact^.(hc_who . _Just . cw_lastName) ]
118
119
120 pairMaps :: Map (NgramsT Ngrams) a
121 -> Map (NgramsT Ngrams) NgramsId
122 -> Map NgramsIndexed (Map NgramsType a)
123 pairMaps m1 m2 =
124 DM.fromList
125 [ (NgramsIndexed ng nId, DM.singleton nt n2i)
126 | (k@(NgramsT nt ng),n2i) <- DM.toList m1
127 , Just nId <- [DM.lookup k m2]
128 ]
129
130 -----------------------------------------------------------------------
131 getNgramsTindexed :: CorpusId
132 -> NgramsType
133 -> Cmd err (Map (NgramsT Ngrams) NgramsId)
134 getNgramsTindexed corpusId ngramsType' = fromList
135 <$> map (\(ngramsId',t,n) -> (NgramsT ngramsType' (Ngrams t n),ngramsId'))
136 <$> selectNgramsTindexed corpusId ngramsType'
137 where
138 selectNgramsTindexed :: CorpusId
139 -> NgramsType
140 -> Cmd err [(NgramsId, Terms, Int)]
141 selectNgramsTindexed corpusId' ngramsType'' = runPGSQuery selectQuery (corpusId', ngramsTypeId ngramsType'')
142 where
143 selectQuery = [sql| SELECT n.id,n.terms,n.n from ngrams n
144 JOIN node_node_ngrams occ ON occ.ngrams_id = n.id
145 -- JOIN node_node_ngrams2 occ ON occ.ngrams_id = n.id
146 JOIN nodes_nodes nn ON nn.node2_id = occ.node2_id
147
148 WHERE nn.node1_id = ?
149 AND occ.ngrams_type = ?
150 AND occ.node2_id = nn.node2_id
151 GROUP BY n.id;
152 |]
153
154 ------------------------------------------------------------------------
155
156
157 -- resultPairing ::
158
159 type ContactName = Text
160 type DocAuthor = Text
161
162 data ToProject = ContactName | DocAuthor
163
164 type Projected = Text
165
166
167 type Projection a = Map a Projected
168
169
170 projection :: Set ToProject -> (ToProject -> Projected) -> Projection ToProject
171 projection = undefined
172
173 align :: Projection ContactName -> Projection DocAuthor
174 -> Map ContactName [ContactId] -> Map DocAuthor [DocId]
175 -> Map ContactId (Set DocId)
176 align = undefined
177
178 -- insert ContactId_DocId as NodeNode
179 -- then each ContactId could become a corpus with its DocIds
180
181
182 ------------------------------------------------------------------------
183
184 getNgramsContactId :: AnnuaireId
185 -> ListId
186 -- -> ContactType
187 -> Cmd err (Map Text [Int])
188 getNgramsContactId = undefined
189
190 -- | TODO
191 -- filter Trash / map Authors
192 -- Indexing all ngramsType like Authors
193 getNgramsDocId :: CorpusId
194 -> ListId
195 -> NgramsType
196 -> Cmd err (Map Text [Int])
197 getNgramsDocId corpusId listId ngramsType
198 = fromListWith (<>)
199 <$> map (\(t,nId) -> (t,[nId]))
200 <$> selectNgramsDocId corpusId listId ngramsType
201
202 selectNgramsDocId :: CorpusId
203 -> ListId
204 -> NgramsType
205 -> Cmd err [(Text, Int)]
206 selectNgramsDocId corpusId' listId' ngramsType' =
207 runPGSQuery selectQuery (corpusId', listId', ngramsTypeId ngramsType')
208 where
209 selectQuery = [sql| SELECT ng.terms,nnng.node2_id from ngrams ng
210 JOIN node_node_ngrams nnng ON nnng.ngrams_id = ng.id
211 JOIN nodes_nodes nn ON nn.node2_id = nnng.node2_id
212
213 WHERE nn.node1_id = ?
214 AND nnng.node1_id = ?
215 AND nnng.ngrams_type = ?
216 ;
217 |]
218
219
220
221
222
223 {- | TODO more typed SQL queries
224 selectNgramsTindexed :: CorpusId -> NgramsType -> Query NgramsRead
225 selectNgramsTindexed corpusId ngramsType = proc () -> do
226 nodeNode <- queryNodeNodeTable -< ()
227 nodeNgrams <- queryNodesNgramsTable -< ()
228 ngrams <- queryNgramsTable -< ()
229
230 restrict -< node1_id nodeNode .== pgInt4 corpusId
231 restrict -< node2_id nodeNode .== node_id nodeNgrams
232 restrict -< ngrams_id ngrams .== node_ngrams nodeNgrams
233
234 result <- aggregate groupBy (ngrams_id ngrams)
235 returnA -< result
236 --}