2 Module : Gargantext.Database.Flow
3 Description : Database Flow
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE QuasiQuotes #-}
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE OverloadedStrings #-}
15 {-# LANGUAGE RankNTypes #-}
16 -- {-# LANGUAGE Arrows #-}
18 module Gargantext.Database.Flow.Pairing
22 --import Debug.Trace (trace)
23 import Control.Lens (_Just, (^.))
24 import Database.PostgreSQL.Simple.SqlQQ (sql)
26 -- import Opaleye.Aggregate
27 -- import Control.Arrow (returnA)
28 import Data.Maybe (catMaybes)
29 import Data.Map (Map, fromList)
31 import qualified Data.Map as DM
32 import Data.Text (Text, toLower)
33 import qualified Data.Text as DT
34 import Gargantext.Prelude hiding (sum)
35 import Gargantext.Core.Types (TableResult(..))
36 import Gargantext.Database.Schema.Ngrams -- (NgramsType(..))
37 import Gargantext.Database.Node.Contact -- (HyperdataContact(..))
38 import Gargantext.Database.Flow.Utils
39 import Gargantext.Database.Utils (Cmd, runPGSQuery)
40 import Gargantext.Database.Types.Node (AnnuaireId, CorpusId, ListId{-, DocId, ContactId-})
41 import Gargantext.Database.Node.Children (getAllContacts)
43 -- TODO mv this type in Types Main
47 pairing'' :: (CorpusId, CorpusId) -> (DocId -> DocId)
50 pairing' :: (CorpusId, AnnuaireId) -> (DocId -> ContactId)
54 -- | TODO : add paring policy as parameter
55 pairing :: CorpusId -- (CorpusId, ListId) -- Pair (Either CorpusId AnnuaireId) ListId
56 -> AnnuaireId -- (AnnuaireId, ListId) -- Pair (Either CorpusId AnnuaireId) ListId
59 pairing cId aId lId = do
60 contacts' <- getAllContacts aId
61 let contactsMap = pairingPolicyToMap toLower
62 $ toMaps extractNgramsT (tr_docs contacts')
64 ngramsMap' <- getNgramsTindexed cId Authors
65 let ngramsMap = pairingPolicyToMap lastName ngramsMap'
67 let indexedNgrams = pairMaps contactsMap ngramsMap
69 insertDocNgrams lId indexedNgrams
71 lastName :: Terms -> Terms
72 lastName texte = DT.toLower
73 $ maybe texte (\x -> if DT.length x > 3 then x else texte) (lastName' texte)
75 lastName' = lastMay . DT.splitOn " "
77 -- TODO: this method is dangerous (maybe equalities of the result are not taken into account
78 -- emergency demo plan...)
79 pairingPolicyToMap :: (Terms -> Terms)
80 -> Map (NgramsT Ngrams) a
81 -> Map (NgramsT Ngrams) a
82 pairingPolicyToMap f = DM.mapKeys (pairingPolicy f)
84 pairingPolicy :: (Terms -> Terms)
87 pairingPolicy f (NgramsT nt (Ngrams ng _)) = (NgramsT nt (Ngrams (f ng) 1))
89 -- | TODO : use Occurrences in place of Int
90 extractNgramsT :: HyperdataContact
91 -> Map (NgramsT Ngrams) Int
92 extractNgramsT contact = fromList [(NgramsT Authors a' , 1)| a' <- authors ]
94 authors = map text2ngrams
95 $ catMaybes [ contact^.(hc_who . _Just . cw_lastName) ]
98 pairMaps :: Map (NgramsT Ngrams) a
99 -> Map (NgramsT Ngrams) NgramsId
100 -> Map NgramsIndexed (Map NgramsType a)
103 [ (NgramsIndexed ng nId, DM.singleton nt n2i)
104 | (k@(NgramsT nt ng),n2i) <- DM.toList m1
105 , Just nId <- [DM.lookup k m2]
108 -----------------------------------------------------------------------
109 getNgramsTindexed :: CorpusId
111 -> Cmd err (Map (NgramsT Ngrams) NgramsId)
112 getNgramsTindexed corpusId ngramsType' = fromList
113 <$> map (\(ngramsId',t,n) -> (NgramsT ngramsType' (Ngrams t n),ngramsId'))
114 <$> selectNgramsTindexed corpusId ngramsType'
116 selectNgramsTindexed :: CorpusId
118 -> Cmd err [(NgramsId, Terms, Int)]
119 selectNgramsTindexed corpusId' ngramsType'' = runPGSQuery selectQuery (corpusId', ngramsTypeId ngramsType'')
121 selectQuery = [sql| SELECT n.id,n.terms,n.n from ngrams n
122 JOIN node_node_ngrams occ ON occ.ngrams_id = n.id
123 -- JOIN node_node_ngrams2 occ ON occ.ngrams_id = n.id
124 JOIN nodes_nodes nn ON nn.node2_id = occ.node2_id
126 WHERE nn.node1_id = ?
127 AND occ.ngrams_type = ?
128 AND occ.node2_id = nn.node2_id
132 {- | TODO more typed SQL queries
133 selectNgramsTindexed :: CorpusId -> NgramsType -> Query NgramsRead
134 selectNgramsTindexed corpusId ngramsType = proc () -> do
135 nodeNode <- queryNodeNodeTable -< ()
136 nodeNgrams <- queryNodesNgramsTable -< ()
137 ngrams <- queryNgramsTable -< ()
139 restrict -< node1_id nodeNode .== pgInt4 corpusId
140 restrict -< node2_id nodeNode .== node_id nodeNgrams
141 restrict -< ngrams_id ngrams .== node_ngrams nodeNgrams
143 result <- aggregate groupBy (ngrams_id ngrams)