2 Module : Gargantext.Database.Flow
3 Description : Database Flow
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE FlexibleContexts #-}
13 {-# LANGUAGE QuasiQuotes #-}
14 {-# LANGUAGE NoImplicitPrelude #-}
15 {-# LANGUAGE OverloadedStrings #-}
16 {-# LANGUAGE RankNTypes #-}
17 -- {-# LANGUAGE Arrows #-}
19 module Gargantext.Database.Flow.Pairing
23 --import Debug.Trace (trace)
24 import Control.Lens (_Just, (^.))
25 import Database.PostgreSQL.Simple.SqlQQ (sql)
27 -- import Opaleye.Aggregate
28 -- import Control.Arrow (returnA)
29 import Data.Maybe (catMaybes)
30 import Data.Map (Map, fromList)
32 import qualified Data.Map as DM
33 import Data.Text (Text, toLower)
34 import qualified Data.Text as DT
35 import Gargantext.Prelude hiding (sum)
36 import Gargantext.Core.Types (TableResult(..))
37 import Gargantext.Database.Schema.Ngrams -- (NgramsType(..))
38 import Gargantext.Database.Node.Contact -- (HyperdataContact(..))
39 import Gargantext.Database.Flow.Utils
40 import Gargantext.Database.Utils (Cmd, runPGSQuery)
41 import Gargantext.Database.Types.Node (AnnuaireId, CorpusId, ListId{-, DocId, ContactId-})
42 import Gargantext.Database.Node.Children (getAllContacts)
44 -- TODO mv this type in Types Main
48 pairing'' :: (CorpusId, CorpusId) -> (DocId -> DocId)
51 pairing' :: (CorpusId, AnnuaireId) -> (DocId -> ContactId)
55 -- | TODO : add paring policy as parameter
56 pairing :: CorpusId -- (CorpusId, ListId) -- Pair (Either CorpusId AnnuaireId) ListId
57 -> AnnuaireId -- (AnnuaireId, ListId) -- Pair (Either CorpusId AnnuaireId) ListId
60 pairing cId aId lId = do
61 contacts' <- getAllContacts aId
62 let contactsMap = pairingPolicyToMap toLower
63 $ toMaps extractNgramsT (tr_docs contacts')
65 ngramsMap' <- getNgramsTindexed cId Authors
66 let ngramsMap = pairingPolicyToMap lastName ngramsMap'
68 let indexedNgrams = pairMaps contactsMap ngramsMap
70 insertDocNgrams lId indexedNgrams
72 lastName :: Terms -> Terms
73 lastName texte = DT.toLower
74 $ maybe texte (\x -> if DT.length x > 3 then x else texte) (lastName' texte)
76 lastName' = lastMay . DT.splitOn " "
78 -- TODO: this method is dangerous (maybe equalities of the result are not taken into account
79 -- emergency demo plan...)
80 pairingPolicyToMap :: (Terms -> Terms)
81 -> Map (NgramsT Ngrams) a
82 -> Map (NgramsT Ngrams) a
83 pairingPolicyToMap f = DM.mapKeys (pairingPolicy f)
85 pairingPolicy :: (Terms -> Terms)
88 pairingPolicy f (NgramsT nt (Ngrams ng _)) = (NgramsT nt (Ngrams (f ng) 1))
90 -- | TODO : use Occurrences in place of Int
91 extractNgramsT :: HyperdataContact
92 -> Map (NgramsT Ngrams) Int
93 extractNgramsT contact = fromList [(NgramsT Authors a' , 1)| a' <- authors ]
95 authors = map text2ngrams
96 $ catMaybes [ contact^.(hc_who . _Just . cw_lastName) ]
99 pairMaps :: Map (NgramsT Ngrams) a
100 -> Map (NgramsT Ngrams) NgramsId
101 -> Map NgramsIndexed (Map NgramsType a)
104 [ (NgramsIndexed ng nId, DM.singleton nt n2i)
105 | (k@(NgramsT nt ng),n2i) <- DM.toList m1
106 , Just nId <- [DM.lookup k m2]
109 -----------------------------------------------------------------------
110 getNgramsTindexed :: CorpusId
112 -> Cmd err (Map (NgramsT Ngrams) NgramsId)
113 getNgramsTindexed corpusId ngramsType' = fromList
114 <$> map (\(ngramsId',t,n) -> (NgramsT ngramsType' (Ngrams t n),ngramsId'))
115 <$> selectNgramsTindexed corpusId ngramsType'
117 selectNgramsTindexed :: CorpusId
119 -> Cmd err [(NgramsId, Terms, Int)]
120 selectNgramsTindexed corpusId' ngramsType'' = runPGSQuery selectQuery (corpusId', ngramsTypeId ngramsType'')
122 selectQuery = [sql| SELECT n.id,n.terms,n.n from ngrams n
123 JOIN node_node_ngrams occ ON occ.ngrams_id = n.id
124 -- JOIN node_node_ngrams2 occ ON occ.ngrams_id = n.id
125 JOIN nodes_nodes nn ON nn.node2_id = occ.node2_id
127 WHERE nn.node1_id = ?
128 AND occ.ngrams_type = ?
129 AND occ.node2_id = nn.node2_id
133 {- | TODO more typed SQL queries
134 selectNgramsTindexed :: CorpusId -> NgramsType -> Query NgramsRead
135 selectNgramsTindexed corpusId ngramsType = proc () -> do
136 nodeNode <- queryNodeNodeTable -< ()
137 nodeNgrams <- queryNodesNgramsTable -< ()
138 ngrams <- queryNgramsTable -< ()
140 restrict -< node1_id nodeNode .== pgInt4 corpusId
141 restrict -< node2_id nodeNode .== node_id nodeNgrams
142 restrict -< ngrams_id ngrams .== node_ngrams nodeNgrams
144 result <- aggregate groupBy (ngrams_id ngrams)