]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Flow/Pairing.hs
[FLOW] Pairing.
[gargantext.git] / src / Gargantext / Database / Flow / Pairing.hs
1 {-|
2 Module : Gargantext.Database.Flow
3 Description : Database Flow
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE QuasiQuotes #-}
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE OverloadedStrings #-}
15 -- {-# LANGUAGE Arrows #-}
16
17 module Gargantext.Database.Flow.Pairing
18 where
19
20 --import Debug.Trace (trace)
21 import Control.Lens (_Just,view)
22 import Database.PostgreSQL.Simple (Connection, query)
23 import Database.PostgreSQL.Simple.SqlQQ (sql)
24 -- import Opaleye
25 -- import Opaleye.Aggregate
26 -- import Control.Arrow (returnA)
27 import Data.Maybe (catMaybes)
28 import Data.Map (Map, fromList)
29 import Safe (lastMay)
30 import qualified Data.Map as DM
31 import Data.Text (Text, toLower)
32 import qualified Data.Text as DT
33 import Gargantext.Prelude hiding (sum)
34 import Gargantext.Database.Ngrams -- (NgramsType(..))
35 --import Gargantext.Database.Node.Contact -- (HyperdataContact(..))
36 --import Gargantext.Database.Types.Node -- (Hyperdata(..))
37 import Gargantext.Database.Node.Contact
38 import Gargantext.Database.Flow.Utils
39 import Gargantext.Database.Node (Cmd, mkCmd)
40 import Gargantext.Database.Node.Children
41 import Gargantext.Core.Types.Main
42 import Gargantext.Core.Types (NodeType(..))
43 import Gargantext.Database.Bashql (runCmd')
44
45 -- TODO mv this type in Types Main
46 type Terms = Text
47
48 -- | TODO : add paring policy as parameter
49 pairing :: AnnuaireId -> CorpusId -> IO Int
50 pairing aId cId = do
51 contacts' <- runCmd' $ getContacts aId (Just NodeContact)
52 let contactsMap = pairingPolicyToMap toLower $ toMaps extractNgramsT contacts'
53
54 ngramsMap' <- runCmd' $ getNgramsTindexed cId Authors
55 let ngramsMap = pairingPolicyToMap lastName ngramsMap'
56
57 let indexedNgrams = pairMaps contactsMap ngramsMap
58
59 runCmd' $ insertToNodeNgrams indexedNgrams
60 -- TODO add List
61
62 lastName :: Terms -> Terms
63 lastName texte = DT.toLower $ maybe texte (\x -> if DT.length x > 3 then x else texte) (lastName' texte)
64 where
65 lastName' = lastMay . DT.splitOn " "
66
67 -- TODO: this methods is dangerous (maybe equalities of the result are not taken into account
68 -- emergency demo plan...
69 pairingPolicyToMap :: (Terms -> Terms)
70 -> Map (NgramsT Ngrams) a -> Map (NgramsT Ngrams) a
71 pairingPolicyToMap f = DM.mapKeys (pairingPolicy f)
72
73 pairingPolicy :: (Terms -> Terms) -> NgramsT Ngrams -> NgramsT Ngrams
74 pairingPolicy f (NgramsT nt (Ngrams ng _)) = (NgramsT nt (Ngrams (f ng) 1))
75
76 -- | TODO : use Occurrences in place of Int
77 extractNgramsT :: HyperdataContact -> Map (NgramsT Ngrams) Int
78 extractNgramsT contact = fromList [(NgramsT Authors a' , 1)| a' <- authors ]
79 where
80 authors = map text2ngrams $ catMaybes [view (hc_who . _Just . cw_lastName) contact]
81 --}
82
83
84 pairMaps :: Map (NgramsT Ngrams) (Map ContactId Int)
85 -> Map (NgramsT Ngrams) NgramsId
86 -> Map (NgramsT NgramsIndexed) (Map ContactId Int)
87 pairMaps m1 m2 = DM.fromList $ catMaybes $ map (\(k,n) -> (,) <$> lookup' k m2 <*> Just n) $ DM.toList m1
88 where
89 lookup' k@(NgramsT nt ng) m = case DM.lookup k m of
90 Nothing -> Nothing
91 Just nId -> Just $ NgramsT nt (NgramsIndexed ng nId)
92
93
94 -----------------------------------------------------------------------
95 getNgramsTindexed:: CorpusId -> NgramsType -> Cmd (Map (NgramsT Ngrams) NgramsId)
96 getNgramsTindexed corpusId ngramsType' = mkCmd $ \c -> fromList
97 <$> map (\(ngramsId',t,n) -> (NgramsT ngramsType' (Ngrams t n),ngramsId'))
98 <$> selectNgramsTindexed c corpusId ngramsType'
99
100 selectNgramsTindexed :: Connection -> CorpusId -> NgramsType -> IO [(NgramsId, Terms, Int)]
101 selectNgramsTindexed c corpusId ngramsType'' = query c selectQuery (corpusId, ngramsTypeId ngramsType'')
102 where
103 selectQuery = [sql| SELECT n.id,n.terms,n.n from ngrams n
104 JOIN nodes_ngrams occ ON occ.ngram_id = n.id
105 JOIN nodes_nodes nn ON nn.node2_id = occ.node_id
106
107 WHERE nn.node1_id = ?
108 AND occ.ngrams_type = ?
109 AND occ.node_id = nn.node2_id
110 GROUP BY n.id;
111 |]
112
113 {- | TODO more typed SQL queries
114 selectNgramsTindexed :: CorpusId -> NgramsType -> Query NgramsRead
115 selectNgramsTindexed corpusId ngramsType = proc () -> do
116 nodeNode <- queryNodeNodeTable -< ()
117 nodeNgrams <- queryNodesNgramsTable -< ()
118 ngrams <- queryNgramsTable -< ()
119
120 restrict -< node1_id nodeNode .== pgInt4 corpusId
121 restrict -< node2_id nodeNode .== node_id nodeNgrams
122 restrict -< ngrams_id ngrams .== node_ngrams nodeNgrams
123
124 result <- aggregate groupBy (ngrams_id ngrams)
125 returnA -< result
126 --}
127
128