2 Module : Gargantext.Database.Flow
3 Description : Database Flow
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE QuasiQuotes #-}
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE OverloadedStrings #-}
15 {-# LANGUAGE RankNTypes #-}
16 -- {-# LANGUAGE Arrows #-}
18 module Gargantext.Database.Flow.Pairing
21 --import Debug.Trace (trace)
22 import Control.Lens (_Just,view)
23 import Database.PostgreSQL.Simple.SqlQQ (sql)
25 -- import Opaleye.Aggregate
26 -- import Control.Arrow (returnA)
27 import Data.Maybe (catMaybes)
28 import Data.Map (Map, fromList)
30 import qualified Data.Map as DM
31 import Data.Text (Text, toLower)
32 import qualified Data.Text as DT
33 import Gargantext.Prelude hiding (sum)
34 import Gargantext.Database.Schema.Ngrams -- (NgramsType(..))
35 --import Gargantext.Database.Node.Contact -- (HyperdataContact(..))
36 --import Gargantext.Database.Types.Node -- (Hyperdata(..))
37 import Gargantext.Database.Node.Contact
38 import Gargantext.Database.Flow.Utils
39 import Gargantext.Database.Utils (Cmd, runPGSQuery)
40 import Gargantext.Database.Types.Node (AnnuaireId, CorpusId)
41 import Gargantext.Database.Node.Children
42 import Gargantext.Core.Types (NodeType(..))
44 -- TODO mv this type in Types Main
47 -- | TODO : add paring policy as parameter
48 pairing :: AnnuaireId -> CorpusId -> Cmd err Int
50 contacts' <- getContacts aId (Just NodeContact)
51 let contactsMap = pairingPolicyToMap toLower $ toMaps extractNgramsT contacts'
53 ngramsMap' <- getNgramsTindexed cId Authors
54 let ngramsMap = pairingPolicyToMap lastName ngramsMap'
56 let indexedNgrams = pairMaps contactsMap ngramsMap
58 insertToNodeNgrams indexedNgrams
61 lastName :: Terms -> Terms
62 lastName texte = DT.toLower $ maybe texte (\x -> if DT.length x > 3 then x else texte) (lastName' texte)
64 lastName' = lastMay . DT.splitOn " "
66 -- TODO: this methods is dangerous (maybe equalities of the result are not taken into account
67 -- emergency demo plan...
68 pairingPolicyToMap :: (Terms -> Terms)
69 -> Map (NgramsT Ngrams) a -> Map (NgramsT Ngrams) a
70 pairingPolicyToMap f = DM.mapKeys (pairingPolicy f)
72 pairingPolicy :: (Terms -> Terms) -> NgramsT Ngrams -> NgramsT Ngrams
73 pairingPolicy f (NgramsT nt (Ngrams ng _)) = (NgramsT nt (Ngrams (f ng) 1))
75 -- | TODO : use Occurrences in place of Int
76 extractNgramsT :: HyperdataContact -> Map (NgramsT Ngrams) Int
77 extractNgramsT contact = fromList [(NgramsT Authors a' , 1)| a' <- authors ]
79 authors = map text2ngrams $ catMaybes [view (hc_who . _Just . cw_lastName) contact]
82 -- NP: notice how this function is no longer specific to the ContactId type
83 pairMaps :: Map (NgramsT Ngrams) a
84 -> Map (NgramsT Ngrams) NgramsId
85 -> Map NgramsIndexed (Map NgramsType a)
88 [ (NgramsIndexed ng nId, DM.singleton nt n2i)
89 | (k@(NgramsT nt ng),n2i) <- DM.toList m1
90 , Just nId <- [DM.lookup k m2]
93 -----------------------------------------------------------------------
94 getNgramsTindexed:: CorpusId -> NgramsType -> Cmd err (Map (NgramsT Ngrams) NgramsId)
95 getNgramsTindexed corpusId ngramsType' = fromList
96 <$> map (\(ngramsId',t,n) -> (NgramsT ngramsType' (Ngrams t n),ngramsId'))
97 <$> selectNgramsTindexed corpusId ngramsType'
99 selectNgramsTindexed :: CorpusId -> NgramsType -> Cmd err [(NgramsId, Terms, Int)]
100 selectNgramsTindexed corpusId ngramsType'' = runPGSQuery selectQuery (corpusId, ngramsTypeId ngramsType'')
102 selectQuery = [sql| SELECT n.id,n.terms,n.n from ngrams n
103 JOIN nodes_ngrams occ ON occ.ngram_id = n.id
104 JOIN nodes_nodes nn ON nn.node2_id = occ.node_id
106 WHERE nn.node1_id = ?
107 AND occ.ngrams_type = ?
108 AND occ.node_id = nn.node2_id
112 {- | TODO more typed SQL queries
113 selectNgramsTindexed :: CorpusId -> NgramsType -> Query NgramsRead
114 selectNgramsTindexed corpusId ngramsType = proc () -> do
115 nodeNode <- queryNodeNodeTable -< ()
116 nodeNgrams <- queryNodesNgramsTable -< ()
117 ngrams <- queryNgramsTable -< ()
119 restrict -< node1_id nodeNode .== pgInt4 corpusId
120 restrict -< node2_id nodeNode .== node_id nodeNgrams
121 restrict -< ngrams_id ngrams .== node_ngrams nodeNgrams
123 result <- aggregate groupBy (ngrams_id ngrams)