2 Module : Gargantext.Database.Flow
3 Description : Database Flow
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 # Spécifications for pairing
14 add NodeType Community (instead of texts, contacts)
19 get defaultList Id of each (for now)
22 listId_ngramsId (authors)
25 listId_contactId_[ngramsId']
28 if isSame ngramsId ngramsId'
30 insert listId_docId_contactId
37 {-# LANGUAGE QuasiQuotes #-}
38 -- {-# LANGUAGE Arrows #-}
40 module Gargantext.Database.Action.Flow.Pairing
45 import Control.Lens (_Just, (^.))
46 import Data.Map (Map, fromList, fromListWith)
47 import Data.Maybe (catMaybes)
48 import Data.Text (Text, toLower)
49 import Database.PostgreSQL.Simple.SqlQQ (sql)
50 import Gargantext.Core.Types (TableResult(..))
51 import Gargantext.Database.Action.Flow.Utils
52 import Gargantext.Database.Admin.Types.Node (AnnuaireId, CorpusId, ListId{-, DocId, ContactId-})
53 import Gargantext.Database.Prelude (Cmd, runPGSQuery)
54 import Gargantext.Database.Query.Table.Node.Children (getAllContacts)
55 import Gargantext.Database.Admin.Types.Hyperdata -- (HyperdataContact(..))
56 import Gargantext.Database.Schema.Ngrams -- (NgramsType(..))
57 import Gargantext.Prelude hiding (sum)
59 import qualified Data.Map as DM
60 import qualified Data.Text as DT
61 import qualified Data.Set as Set
63 -- TODO mv this type in Types Main
67 pairing'' :: (CorpusId, CorpusId) -> (DocId -> DocId)
70 pairing' :: (CorpusId, AnnuaireId) -> (DocId -> ContactId)
74 -- | TODO : add paring policy as parameter
75 pairing :: CorpusId -- (CorpusId, ListId) -- Pair (Either CorpusId AnnuaireId) ListId
76 -> AnnuaireId -- (AnnuaireId, ListId) -- Pair (Either CorpusId AnnuaireId) ListId
79 pairing cId aId lId = do
80 contacts' <- getAllContacts aId
81 let contactsMap = pairingPolicyToMap toLower
82 $ toMaps extractNgramsT (tr_docs contacts')
84 ngramsMap' <- getNgramsTindexed cId Authors
85 let ngramsMap = pairingPolicyToMap lastName ngramsMap'
87 let indexedNgrams = pairMaps contactsMap ngramsMap
89 insertDocNgrams lId indexedNgrams
91 -- TODO: this method is dangerous (maybe equalities of the result are
92 -- not taken into account emergency demo plan...)
93 pairingPolicyToMap :: (Terms -> Terms)
94 -> Map (NgramsT Ngrams) a
95 -> Map (NgramsT Ngrams) a
96 pairingPolicyToMap f = DM.mapKeys (pairingPolicy f)
98 lastName :: Terms -> Terms
99 lastName texte = DT.toLower
100 $ maybe texte (\x -> if DT.length x > 3 then x else texte)
103 lastName' = lastMay . DT.splitOn " "
106 pairingPolicy :: (Terms -> Terms)
109 pairingPolicy f (NgramsT nt (Ngrams ng _)) = (NgramsT nt (Ngrams (f ng) 1))
111 -- | TODO : use Occurrences in place of Int
112 extractNgramsT :: HyperdataContact
113 -> Map (NgramsT Ngrams) Int
114 extractNgramsT contact = fromList [(NgramsT Authors a' , 1)| a' <- authors ]
116 authors = map text2ngrams
117 $ catMaybes [ contact^.(hc_who . _Just . cw_lastName) ]
120 pairMaps :: Map (NgramsT Ngrams) a
121 -> Map (NgramsT Ngrams) NgramsId
122 -> Map NgramsIndexed (Map NgramsType a)
125 [ (NgramsIndexed ng nId, DM.singleton nt n2i)
126 | (k@(NgramsT nt ng),n2i) <- DM.toList m1
127 , Just nId <- [DM.lookup k m2]
130 -----------------------------------------------------------------------
131 getNgramsTindexed :: CorpusId
133 -> Cmd err (Map (NgramsT Ngrams) NgramsId)
134 getNgramsTindexed corpusId ngramsType' = fromList
135 <$> map (\(ngramsId',t,n) -> (NgramsT ngramsType' (Ngrams t n),ngramsId'))
136 <$> selectNgramsTindexed corpusId ngramsType'
138 selectNgramsTindexed :: CorpusId
140 -> Cmd err [(NgramsId, Terms, Int)]
141 selectNgramsTindexed corpusId' ngramsType'' = runPGSQuery selectQuery (corpusId', ngramsTypeId ngramsType'')
143 selectQuery = [sql| SELECT n.id,n.terms,n.n from ngrams n
144 JOIN node_node_ngrams occ ON occ.ngrams_id = n.id
145 -- JOIN node_node_ngrams2 occ ON occ.ngrams_id = n.id
146 JOIN nodes_nodes nn ON nn.node2_id = occ.node2_id
148 WHERE nn.node1_id = ?
149 AND occ.ngrams_type = ?
150 AND occ.node2_id = nn.node2_id
154 ------------------------------------------------------------------------
159 type ContactName = Text
160 type DocAuthor = Text
162 data ToProject = ContactName | DocAuthor
164 type Projected = Text
167 type Projection a = Map a Projected
170 projection :: Set ToProject -> (ToProject -> Projected) -> Projection ToProject
171 projection = undefined
173 align :: Projection ContactName -> Projection DocAuthor
174 -> Map ContactName [ContactId] -> Map DocAuthor [DocId]
175 -> Map ContactId (Set DocId)
178 -- insert ContactId_DocId as NodeNode
179 -- then each ContactId could become a corpus with its DocIds
182 ------------------------------------------------------------------------
184 getNgramsContactId :: AnnuaireId
187 -> Cmd err (Map Text [Int])
188 getNgramsContactId = undefined
191 -- filter Trash / map Authors
192 -- Indexing all ngramsType like Authors
193 getNgramsDocId :: CorpusId
196 -> Cmd err (Map Text [Int])
197 getNgramsDocId corpusId listId ngramsType
199 <$> map (\(t,nId) -> (t,[nId]))
200 <$> selectNgramsDocId corpusId listId ngramsType
202 selectNgramsDocId :: CorpusId
205 -> Cmd err [(Text, Int)]
206 selectNgramsDocId corpusId' listId' ngramsType' =
207 runPGSQuery selectQuery (corpusId', listId', ngramsTypeId ngramsType')
209 selectQuery = [sql| SELECT ng.terms,nnng.node2_id from ngrams ng
210 JOIN node_node_ngrams nnng ON nnng.ngrams_id = ng.id
211 JOIN nodes_nodes nn ON nn.node2_id = nnng.node2_id
213 WHERE nn.node1_id = ?
214 AND nnng.node1_id = ?
215 AND nnng.ngrams_type = ?
223 {- | TODO more typed SQL queries
224 selectNgramsTindexed :: CorpusId -> NgramsType -> Query NgramsRead
225 selectNgramsTindexed corpusId ngramsType = proc () -> do
226 nodeNode <- queryNodeNodeTable -< ()
227 nodeNgrams <- queryNodesNgramsTable -< ()
228 ngrams <- queryNgramsTable -< ()
230 restrict -< node1_id nodeNode .== pgInt4 corpusId
231 restrict -< node2_id nodeNode .== node_id nodeNgrams
232 restrict -< ngrams_id ngrams .== node_ngrams nodeNgrams
234 result <- aggregate groupBy (ngrams_id ngrams)