2 Module : Gargantext.Database.Flow
3 Description : Database Flow
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE QuasiQuotes #-}
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE OverloadedStrings #-}
15 -- {-# LANGUAGE Arrows #-}
17 module Gargantext.Database.Flow.Pairing
20 --import Debug.Trace (trace)
21 import Control.Lens (_Just,view)
22 import Database.PostgreSQL.Simple (Connection, query)
23 import Database.PostgreSQL.Simple.SqlQQ (sql)
25 -- import Opaleye.Aggregate
26 -- import Control.Arrow (returnA)
27 import Data.Maybe (catMaybes)
28 import Data.Map (Map, fromList)
30 import qualified Data.Map as DM
31 import Data.Text (Text, toLower)
32 import qualified Data.Text as DT
33 import Gargantext.Prelude hiding (sum)
34 import Gargantext.Database.Schema.Ngrams -- (NgramsType(..))
35 --import Gargantext.Database.Node.Contact -- (HyperdataContact(..))
36 --import Gargantext.Database.Types.Node -- (Hyperdata(..))
37 import Gargantext.Database.Node.Contact
38 import Gargantext.Database.Flow.Utils
39 import Gargantext.Database.Node (Cmd, mkCmd)
40 import Gargantext.Database.Node.Children
41 import Gargantext.Core.Types.Main
42 import Gargantext.Core.Types (NodeType(..))
43 import Gargantext.Database.Bashql (runCmd')
45 -- TODO mv this type in Types Main
48 -- | TODO : add paring policy as parameter
49 pairing :: AnnuaireId -> CorpusId -> IO Int
51 contacts' <- runCmd' $ getContacts aId (Just NodeContact)
52 let contactsMap = pairingPolicyToMap toLower $ toMaps extractNgramsT contacts'
54 ngramsMap' <- runCmd' $ getNgramsTindexed cId Authors
55 let ngramsMap = pairingPolicyToMap lastName ngramsMap'
57 let indexedNgrams = pairMaps contactsMap ngramsMap
59 runCmd' $ insertToNodeNgrams indexedNgrams
62 lastName :: Terms -> Terms
63 lastName texte = DT.toLower $ maybe texte (\x -> if DT.length x > 3 then x else texte) (lastName' texte)
65 lastName' = lastMay . DT.splitOn " "
67 -- TODO: this methods is dangerous (maybe equalities of the result are not taken into account
68 -- emergency demo plan...
69 pairingPolicyToMap :: (Terms -> Terms)
70 -> Map (NgramsT Ngrams) a -> Map (NgramsT Ngrams) a
71 pairingPolicyToMap f = DM.mapKeys (pairingPolicy f)
73 pairingPolicy :: (Terms -> Terms) -> NgramsT Ngrams -> NgramsT Ngrams
74 pairingPolicy f (NgramsT nt (Ngrams ng _)) = (NgramsT nt (Ngrams (f ng) 1))
76 -- | TODO : use Occurrences in place of Int
77 extractNgramsT :: HyperdataContact -> Map (NgramsT Ngrams) Int
78 extractNgramsT contact = fromList [(NgramsT Authors a' , 1)| a' <- authors ]
80 authors = map text2ngrams $ catMaybes [view (hc_who . _Just . cw_lastName) contact]
84 pairMaps :: Map (NgramsT Ngrams) (Map ContactId Int)
85 -> Map (NgramsT Ngrams) NgramsId
86 -> Map (NgramsT NgramsIndexed) (Map ContactId Int)
87 pairMaps m1 m2 = DM.fromList $ catMaybes $ map (\(k,n) -> (,) <$> lookup' k m2 <*> Just n) $ DM.toList m1
89 lookup' k@(NgramsT nt ng) m = case DM.lookup k m of
91 Just nId -> Just $ NgramsT nt (NgramsIndexed ng nId)
94 -----------------------------------------------------------------------
95 getNgramsTindexed:: CorpusId -> NgramsType -> Cmd (Map (NgramsT Ngrams) NgramsId)
96 getNgramsTindexed corpusId ngramsType' = mkCmd $ \c -> fromList
97 <$> map (\(ngramsId',t,n) -> (NgramsT ngramsType' (Ngrams t n),ngramsId'))
98 <$> selectNgramsTindexed c corpusId ngramsType'
100 selectNgramsTindexed :: Connection -> CorpusId -> NgramsType -> IO [(NgramsId, Terms, Int)]
101 selectNgramsTindexed c corpusId ngramsType'' = query c selectQuery (corpusId, ngramsTypeId ngramsType'')
103 selectQuery = [sql| SELECT n.id,n.terms,n.n from ngrams n
104 JOIN nodes_ngrams occ ON occ.ngram_id = n.id
105 JOIN nodes_nodes nn ON nn.node2_id = occ.node_id
107 WHERE nn.node1_id = ?
108 AND occ.ngrams_type = ?
109 AND occ.node_id = nn.node2_id
113 {- | TODO more typed SQL queries
114 selectNgramsTindexed :: CorpusId -> NgramsType -> Query NgramsRead
115 selectNgramsTindexed corpusId ngramsType = proc () -> do
116 nodeNode <- queryNodeNodeTable -< ()
117 nodeNgrams <- queryNodesNgramsTable -< ()
118 ngrams <- queryNgramsTable -< ()
120 restrict -< node1_id nodeNode .== pgInt4 corpusId
121 restrict -< node2_id nodeNode .== node_id nodeNgrams
122 restrict -< ngrams_id ngrams .== node_ngrams nodeNgrams
124 result <- aggregate groupBy (ngrams_id ngrams)