2 Module : Gargantext.Database.Flow
3 Description : Database Flow
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE QuasiQuotes #-}
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE OverloadedStrings #-}
15 {-# LANGUAGE RankNTypes #-}
16 -- {-# LANGUAGE Arrows #-}
18 module Gargantext.Database.Flow.Pairing
22 --import Debug.Trace (trace)
23 import Control.Lens (_Just, (^.))
24 import Database.PostgreSQL.Simple.SqlQQ (sql)
26 -- import Opaleye.Aggregate
27 -- import Control.Arrow (returnA)
28 import Data.Maybe (catMaybes)
29 import Data.Map (Map, fromList)
31 import qualified Data.Map as DM
32 import Data.Text (Text, toLower)
33 import qualified Data.Text as DT
34 import Gargantext.Prelude hiding (sum)
35 import Gargantext.Database.Schema.Ngrams -- (NgramsType(..))
36 import Gargantext.Database.Node.Contact -- (HyperdataContact(..))
37 import Gargantext.Database.Flow.Utils
38 import Gargantext.Database.Utils (Cmd, runPGSQuery)
39 import Gargantext.Database.Types.Node (AnnuaireId, CorpusId, ListId)
40 import Gargantext.Database.Node.Children (getAllContacts)
42 -- TODO mv this type in Types Main
45 -- | TODO : add paring policy as parameter
50 pairing aId cId lId = do
51 contacts' <- getAllContacts aId
52 let contactsMap = pairingPolicyToMap toLower
53 $ toMaps extractNgramsT contacts'
55 ngramsMap' <- getNgramsTindexed cId Authors
56 let ngramsMap = pairingPolicyToMap lastName ngramsMap'
58 let indexedNgrams = pairMaps contactsMap ngramsMap
60 insertDocNgrams lId indexedNgrams
62 lastName :: Terms -> Terms
63 lastName texte = DT.toLower
64 $ maybe texte (\x -> if DT.length x > 3 then x else texte) (lastName' texte)
66 lastName' = lastMay . DT.splitOn " "
68 -- TODO: this method is dangerous (maybe equalities of the result are not taken into account
69 -- emergency demo plan...)
70 pairingPolicyToMap :: (Terms -> Terms)
71 -> Map (NgramsT Ngrams) a
72 -> Map (NgramsT Ngrams) a
73 pairingPolicyToMap f = DM.mapKeys (pairingPolicy f)
75 pairingPolicy :: (Terms -> Terms)
78 pairingPolicy f (NgramsT nt (Ngrams ng _)) = (NgramsT nt (Ngrams (f ng) 1))
80 -- | TODO : use Occurrences in place of Int
81 extractNgramsT :: HyperdataContact
82 -> Map (NgramsT Ngrams) Int
83 extractNgramsT contact = fromList [(NgramsT Authors a' , 1)| a' <- authors ]
85 authors = map text2ngrams
86 $ catMaybes [ contact^.(hc_who . _Just . cw_lastName) ]
89 pairMaps :: Map (NgramsT Ngrams) a
90 -> Map (NgramsT Ngrams) NgramsId
91 -> Map NgramsIndexed (Map NgramsType a)
94 [ (NgramsIndexed ng nId, DM.singleton nt n2i)
95 | (k@(NgramsT nt ng),n2i) <- DM.toList m1
96 , Just nId <- [DM.lookup k m2]
99 -----------------------------------------------------------------------
100 getNgramsTindexed :: CorpusId
102 -> Cmd err (Map (NgramsT Ngrams) NgramsId)
103 getNgramsTindexed corpusId ngramsType' = fromList
104 <$> map (\(ngramsId',t,n) -> (NgramsT ngramsType' (Ngrams t n),ngramsId'))
105 <$> selectNgramsTindexed corpusId ngramsType'
107 selectNgramsTindexed :: CorpusId
109 -> Cmd err [(NgramsId, Terms, Int)]
110 selectNgramsTindexed corpusId' ngramsType'' = runPGSQuery selectQuery (corpusId', ngramsTypeId ngramsType'')
112 selectQuery = [sql| SELECT n.id,n.terms,n.n from ngrams n
113 JOIN node_node_ngrams occ ON occ.ngrams_id = n.id
114 JOIN nodes_nodes nn ON nn.node2_id = occ.node2_id
116 WHERE nn.node1_id = ?
117 AND occ.ngrams_type = ?
118 AND occ.node2_id = nn.node2_id
122 {- | TODO more typed SQL queries
123 selectNgramsTindexed :: CorpusId -> NgramsType -> Query NgramsRead
124 selectNgramsTindexed corpusId ngramsType = proc () -> do
125 nodeNode <- queryNodeNodeTable -< ()
126 nodeNgrams <- queryNodesNgramsTable -< ()
127 ngrams <- queryNgramsTable -< ()
129 restrict -< node1_id nodeNode .== pgInt4 corpusId
130 restrict -< node2_id nodeNode .== node_id nodeNgrams
131 restrict -< ngrams_id ngrams .== node_ngrams nodeNgrams
133 result <- aggregate groupBy (ngrams_id ngrams)