2 Module : Gargantext.Database.Flow
3 Description : Database Flow
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE QuasiQuotes #-}
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE OverloadedStrings #-}
15 {-# LANGUAGE RankNTypes #-}
16 -- {-# LANGUAGE Arrows #-}
18 module Gargantext.Database.Flow.Pairing
22 --import Debug.Trace (trace)
23 import Control.Lens (_Just, (^.))
24 import Database.PostgreSQL.Simple.SqlQQ (sql)
26 -- import Opaleye.Aggregate
27 -- import Control.Arrow (returnA)
28 import Data.Maybe (catMaybes)
29 import Data.Map (Map, fromList)
31 import qualified Data.Map as DM
32 import Data.Text (Text, toLower)
33 import qualified Data.Text as DT
34 import Gargantext.Prelude hiding (sum)
35 import Gargantext.Core.Types (TableResult(..))
36 import Gargantext.Database.Schema.Ngrams -- (NgramsType(..))
37 import Gargantext.Database.Node.Contact -- (HyperdataContact(..))
38 import Gargantext.Database.Flow.Utils
39 import Gargantext.Database.Utils (Cmd, runPGSQuery)
40 import Gargantext.Database.Types.Node (AnnuaireId, CorpusId, ListId)
41 import Gargantext.Database.Node.Children (getAllContacts)
43 -- TODO mv this type in Types Main
46 -- | TODO : add paring policy as parameter
51 pairing aId cId lId = do
52 contacts' <- getAllContacts aId
53 let contactsMap = pairingPolicyToMap toLower
54 $ toMaps extractNgramsT (tr_docs contacts')
56 ngramsMap' <- getNgramsTindexed cId Authors
57 let ngramsMap = pairingPolicyToMap lastName ngramsMap'
59 let indexedNgrams = pairMaps contactsMap ngramsMap
61 insertDocNgrams lId indexedNgrams
63 lastName :: Terms -> Terms
64 lastName texte = DT.toLower
65 $ maybe texte (\x -> if DT.length x > 3 then x else texte) (lastName' texte)
67 lastName' = lastMay . DT.splitOn " "
69 -- TODO: this method is dangerous (maybe equalities of the result are not taken into account
70 -- emergency demo plan...)
71 pairingPolicyToMap :: (Terms -> Terms)
72 -> Map (NgramsT Ngrams) a
73 -> Map (NgramsT Ngrams) a
74 pairingPolicyToMap f = DM.mapKeys (pairingPolicy f)
76 pairingPolicy :: (Terms -> Terms)
79 pairingPolicy f (NgramsT nt (Ngrams ng _)) = (NgramsT nt (Ngrams (f ng) 1))
81 -- | TODO : use Occurrences in place of Int
82 extractNgramsT :: HyperdataContact
83 -> Map (NgramsT Ngrams) Int
84 extractNgramsT contact = fromList [(NgramsT Authors a' , 1)| a' <- authors ]
86 authors = map text2ngrams
87 $ catMaybes [ contact^.(hc_who . _Just . cw_lastName) ]
90 pairMaps :: Map (NgramsT Ngrams) a
91 -> Map (NgramsT Ngrams) NgramsId
92 -> Map NgramsIndexed (Map NgramsType a)
95 [ (NgramsIndexed ng nId, DM.singleton nt n2i)
96 | (k@(NgramsT nt ng),n2i) <- DM.toList m1
97 , Just nId <- [DM.lookup k m2]
100 -----------------------------------------------------------------------
101 getNgramsTindexed :: CorpusId
103 -> Cmd err (Map (NgramsT Ngrams) NgramsId)
104 getNgramsTindexed corpusId ngramsType' = fromList
105 <$> map (\(ngramsId',t,n) -> (NgramsT ngramsType' (Ngrams t n),ngramsId'))
106 <$> selectNgramsTindexed corpusId ngramsType'
108 selectNgramsTindexed :: CorpusId
110 -> Cmd err [(NgramsId, Terms, Int)]
111 selectNgramsTindexed corpusId' ngramsType'' = runPGSQuery selectQuery (corpusId', ngramsTypeId ngramsType'')
113 selectQuery = [sql| SELECT n.id,n.terms,n.n from ngrams n
114 JOIN node_node_ngrams occ ON occ.ngrams_id = n.id
115 JOIN nodes_nodes nn ON nn.node2_id = occ.node2_id
117 WHERE nn.node1_id = ?
118 AND occ.ngrams_type = ?
119 AND occ.node2_id = nn.node2_id
123 {- | TODO more typed SQL queries
124 selectNgramsTindexed :: CorpusId -> NgramsType -> Query NgramsRead
125 selectNgramsTindexed corpusId ngramsType = proc () -> do
126 nodeNode <- queryNodeNodeTable -< ()
127 nodeNgrams <- queryNodesNgramsTable -< ()
128 ngrams <- queryNgramsTable -< ()
130 restrict -< node1_id nodeNode .== pgInt4 corpusId
131 restrict -< node2_id nodeNode .== node_id nodeNgrams
132 restrict -< ngrams_id ngrams .== node_ngrams nodeNgrams
134 result <- aggregate groupBy (ngrams_id ngrams)