]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Flow/Pairing.hs
[OPTIM] MVar for Graph Clustering.
[gargantext.git] / src / Gargantext / Database / Flow / Pairing.hs
1 {-|
2 Module : Gargantext.Database.Flow
3 Description : Database Flow
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE QuasiQuotes #-}
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE OverloadedStrings #-}
15 {-# LANGUAGE RankNTypes #-}
16 -- {-# LANGUAGE Arrows #-}
17
18 module Gargantext.Database.Flow.Pairing
19 (pairing)
20 where
21
22 --import Debug.Trace (trace)
23 import Control.Lens (_Just, (^.))
24 import Database.PostgreSQL.Simple.SqlQQ (sql)
25 -- import Opaleye
26 -- import Opaleye.Aggregate
27 -- import Control.Arrow (returnA)
28 import Data.Maybe (catMaybes)
29 import Data.Map (Map, fromList)
30 import Safe (lastMay)
31 import qualified Data.Map as DM
32 import Data.Text (Text, toLower)
33 import qualified Data.Text as DT
34 import Gargantext.Prelude hiding (sum)
35 import Gargantext.Core.Types (TableResult(..))
36 import Gargantext.Database.Schema.Ngrams -- (NgramsType(..))
37 import Gargantext.Database.Node.Contact -- (HyperdataContact(..))
38 import Gargantext.Database.Flow.Utils
39 import Gargantext.Database.Utils (Cmd, runPGSQuery)
40 import Gargantext.Database.Types.Node (AnnuaireId, CorpusId, ListId{-, DocId, ContactId-})
41 import Gargantext.Database.Node.Children (getAllContacts)
42
43 -- TODO mv this type in Types Main
44 type Terms = Text
45
46 {-
47 pairing'' :: (CorpusId, CorpusId) -> (DocId -> DocId)
48 pairing'' = undefined
49
50 pairing' :: (CorpusId, AnnuaireId) -> (DocId -> ContactId)
51 pairing' = undefined
52 -}
53
54 -- | TODO : add paring policy as parameter
55 pairing :: CorpusId -- (CorpusId, ListId) -- Pair (Either CorpusId AnnuaireId) ListId
56 -> AnnuaireId -- (AnnuaireId, ListId) -- Pair (Either CorpusId AnnuaireId) ListId
57 -> ListId
58 -> Cmd err Int
59 pairing cId aId lId = do
60 contacts' <- getAllContacts aId
61 let contactsMap = pairingPolicyToMap toLower
62 $ toMaps extractNgramsT (tr_docs contacts')
63
64 ngramsMap' <- getNgramsTindexed cId Authors
65 let ngramsMap = pairingPolicyToMap lastName ngramsMap'
66
67 let indexedNgrams = pairMaps contactsMap ngramsMap
68
69 insertDocNgrams lId indexedNgrams
70
71 lastName :: Terms -> Terms
72 lastName texte = DT.toLower
73 $ maybe texte (\x -> if DT.length x > 3 then x else texte) (lastName' texte)
74 where
75 lastName' = lastMay . DT.splitOn " "
76
77 -- TODO: this method is dangerous (maybe equalities of the result are not taken into account
78 -- emergency demo plan...)
79 pairingPolicyToMap :: (Terms -> Terms)
80 -> Map (NgramsT Ngrams) a
81 -> Map (NgramsT Ngrams) a
82 pairingPolicyToMap f = DM.mapKeys (pairingPolicy f)
83
84 pairingPolicy :: (Terms -> Terms)
85 -> NgramsT Ngrams
86 -> NgramsT Ngrams
87 pairingPolicy f (NgramsT nt (Ngrams ng _)) = (NgramsT nt (Ngrams (f ng) 1))
88
89 -- | TODO : use Occurrences in place of Int
90 extractNgramsT :: HyperdataContact
91 -> Map (NgramsT Ngrams) Int
92 extractNgramsT contact = fromList [(NgramsT Authors a' , 1)| a' <- authors ]
93 where
94 authors = map text2ngrams
95 $ catMaybes [ contact^.(hc_who . _Just . cw_lastName) ]
96
97
98 pairMaps :: Map (NgramsT Ngrams) a
99 -> Map (NgramsT Ngrams) NgramsId
100 -> Map NgramsIndexed (Map NgramsType a)
101 pairMaps m1 m2 =
102 DM.fromList
103 [ (NgramsIndexed ng nId, DM.singleton nt n2i)
104 | (k@(NgramsT nt ng),n2i) <- DM.toList m1
105 , Just nId <- [DM.lookup k m2]
106 ]
107
108 -----------------------------------------------------------------------
109 getNgramsTindexed :: CorpusId
110 -> NgramsType
111 -> Cmd err (Map (NgramsT Ngrams) NgramsId)
112 getNgramsTindexed corpusId ngramsType' = fromList
113 <$> map (\(ngramsId',t,n) -> (NgramsT ngramsType' (Ngrams t n),ngramsId'))
114 <$> selectNgramsTindexed corpusId ngramsType'
115 where
116 selectNgramsTindexed :: CorpusId
117 -> NgramsType
118 -> Cmd err [(NgramsId, Terms, Int)]
119 selectNgramsTindexed corpusId' ngramsType'' = runPGSQuery selectQuery (corpusId', ngramsTypeId ngramsType'')
120 where
121 selectQuery = [sql| SELECT n.id,n.terms,n.n from ngrams n
122 JOIN node_node_ngrams occ ON occ.ngrams_id = n.id
123 -- JOIN node_node_ngrams2 occ ON occ.ngrams_id = n.id
124 JOIN nodes_nodes nn ON nn.node2_id = occ.node2_id
125
126 WHERE nn.node1_id = ?
127 AND occ.ngrams_type = ?
128 AND occ.node2_id = nn.node2_id
129 GROUP BY n.id;
130 |]
131
132 {- | TODO more typed SQL queries
133 selectNgramsTindexed :: CorpusId -> NgramsType -> Query NgramsRead
134 selectNgramsTindexed corpusId ngramsType = proc () -> do
135 nodeNode <- queryNodeNodeTable -< ()
136 nodeNgrams <- queryNodesNgramsTable -< ()
137 ngrams <- queryNgramsTable -< ()
138
139 restrict -< node1_id nodeNode .== pgInt4 corpusId
140 restrict -< node2_id nodeNode .== node_id nodeNgrams
141 restrict -< ngrams_id ngrams .== node_ngrams nodeNgrams
142
143 result <- aggregate groupBy (ngrams_id ngrams)
144 returnA -< result
145 --}