2 Module : Gargantext.Database.Flow
3 Description : Database Flow
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE QuasiQuotes #-}
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE OverloadedStrings #-}
15 {-# LANGUAGE RankNTypes #-}
16 -- {-# LANGUAGE Arrows #-}
18 module Gargantext.Database.Flow.Pairing
22 --import Debug.Trace (trace)
23 import Control.Lens (_Just,view)
24 import Database.PostgreSQL.Simple.SqlQQ (sql)
26 -- import Opaleye.Aggregate
27 -- import Control.Arrow (returnA)
28 import Data.Maybe (catMaybes)
29 import Data.Map (Map, fromList)
31 import qualified Data.Map as DM
32 import Data.Text (Text, toLower)
33 import qualified Data.Text as DT
34 import Gargantext.Prelude hiding (sum)
35 import Gargantext.Database.Schema.Ngrams -- (NgramsType(..))
36 --import Gargantext.Database.Node.Contact -- (HyperdataContact(..))
37 --import Gargantext.Database.Types.Node -- (Hyperdata(..))
38 import Gargantext.Database.Node.Contact
39 import Gargantext.Database.Flow.Utils
40 import Gargantext.Database.Utils (Cmd, runPGSQuery)
41 import Gargantext.Database.Types.Node (AnnuaireId, CorpusId)
42 import Gargantext.Database.Node.Children
43 import Gargantext.Core.Types (NodeType(..))
45 -- TODO mv this type in Types Main
48 -- | TODO : add paring policy as parameter
49 pairing :: AnnuaireId -> CorpusId -> Cmd err Int
51 contacts' <- getContacts aId (Just NodeContact)
52 let contactsMap = pairingPolicyToMap toLower $ toMaps extractNgramsT contacts'
54 ngramsMap' <- getNgramsTindexed cId Authors
55 let ngramsMap = pairingPolicyToMap lastName ngramsMap'
57 let indexedNgrams = pairMaps contactsMap ngramsMap
59 insertToNodeNgrams indexedNgrams
62 lastName :: Terms -> Terms
63 lastName texte = DT.toLower $ maybe texte (\x -> if DT.length x > 3 then x else texte) (lastName' texte)
65 lastName' = lastMay . DT.splitOn " "
67 -- TODO: this methods is dangerous (maybe equalities of the result are not taken into account
68 -- emergency demo plan...
69 pairingPolicyToMap :: (Terms -> Terms)
70 -> Map (NgramsT Ngrams) a -> Map (NgramsT Ngrams) a
71 pairingPolicyToMap f = DM.mapKeys (pairingPolicy f)
73 pairingPolicy :: (Terms -> Terms) -> NgramsT Ngrams -> NgramsT Ngrams
74 pairingPolicy f (NgramsT nt (Ngrams ng _)) = (NgramsT nt (Ngrams (f ng) 1))
76 -- | TODO : use Occurrences in place of Int
77 extractNgramsT :: HyperdataContact -> Map (NgramsT Ngrams) Int
78 extractNgramsT contact = fromList [(NgramsT Authors a' , 1)| a' <- authors ]
80 authors = map text2ngrams $ catMaybes [view (hc_who . _Just . cw_lastName) contact]
83 -- NP: notice how this function is no longer specific to the ContactId type
84 pairMaps :: Map (NgramsT Ngrams) a
85 -> Map (NgramsT Ngrams) NgramsId
86 -> Map NgramsIndexed (Map NgramsType a)
89 [ (NgramsIndexed ng nId, DM.singleton nt n2i)
90 | (k@(NgramsT nt ng),n2i) <- DM.toList m1
91 , Just nId <- [DM.lookup k m2]
94 -----------------------------------------------------------------------
95 getNgramsTindexed:: CorpusId -> NgramsType -> Cmd err (Map (NgramsT Ngrams) NgramsId)
96 getNgramsTindexed corpusId ngramsType' = fromList
97 <$> map (\(ngramsId',t,n) -> (NgramsT ngramsType' (Ngrams t n),ngramsId'))
98 <$> selectNgramsTindexed corpusId ngramsType'
100 selectNgramsTindexed :: CorpusId -> NgramsType -> Cmd err [(NgramsId, Terms, Int)]
101 selectNgramsTindexed corpusId ngramsType'' = runPGSQuery selectQuery (corpusId, ngramsTypeId ngramsType'')
103 selectQuery = [sql| SELECT n.id,n.terms,n.n from ngrams n
104 JOIN nodes_ngrams occ ON occ.ngram_id = n.id
105 JOIN nodes_nodes nn ON nn.node2_id = occ.node_id
107 WHERE nn.node1_id = ?
108 AND occ.ngrams_type = ?
109 AND occ.node_id = nn.node2_id
113 {- | TODO more typed SQL queries
114 selectNgramsTindexed :: CorpusId -> NgramsType -> Query NgramsRead
115 selectNgramsTindexed corpusId ngramsType = proc () -> do
116 nodeNode <- queryNodeNodeTable -< ()
117 nodeNgrams <- queryNodesNgramsTable -< ()
118 ngrams <- queryNgramsTable -< ()
120 restrict -< node1_id nodeNode .== pgInt4 corpusId
121 restrict -< node2_id nodeNode .== node_id nodeNgrams
122 restrict -< ngrams_id ngrams .== node_ngrams nodeNgrams
124 result <- aggregate groupBy (ngrams_id ngrams)