]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Flow/Pairing.hs
Merge branch 'dev' into dev-graph-explorer-gexf
[gargantext.git] / src / Gargantext / Database / Flow / Pairing.hs
1 {-|
2 Module : Gargantext.Database.Flow
3 Description : Database Flow
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE FlexibleContexts #-}
13 {-# LANGUAGE QuasiQuotes #-}
14 {-# LANGUAGE NoImplicitPrelude #-}
15 {-# LANGUAGE OverloadedStrings #-}
16 {-# LANGUAGE RankNTypes #-}
17 -- {-# LANGUAGE Arrows #-}
18
19 module Gargantext.Database.Flow.Pairing
20 (pairing)
21 where
22
23 --import Debug.Trace (trace)
24 import Control.Lens (_Just, (^.))
25 import Database.PostgreSQL.Simple.SqlQQ (sql)
26 -- import Opaleye
27 -- import Opaleye.Aggregate
28 -- import Control.Arrow (returnA)
29 import Data.Maybe (catMaybes)
30 import Data.Map (Map, fromList)
31 import Safe (lastMay)
32 import qualified Data.Map as DM
33 import Data.Text (Text, toLower)
34 import qualified Data.Text as DT
35 import Gargantext.Prelude hiding (sum)
36 import Gargantext.Core.Types (TableResult(..))
37 import Gargantext.Database.Schema.Ngrams -- (NgramsType(..))
38 import Gargantext.Database.Node.Contact -- (HyperdataContact(..))
39 import Gargantext.Database.Flow.Utils
40 import Gargantext.Database.Utils (Cmd, runPGSQuery)
41 import Gargantext.Database.Types.Node (AnnuaireId, CorpusId, ListId{-, DocId, ContactId-})
42 import Gargantext.Database.Node.Children (getAllContacts)
43
44 -- TODO mv this type in Types Main
45 type Terms = Text
46
47 {-
48 pairing'' :: (CorpusId, CorpusId) -> (DocId -> DocId)
49 pairing'' = undefined
50
51 pairing' :: (CorpusId, AnnuaireId) -> (DocId -> ContactId)
52 pairing' = undefined
53 -}
54
55 -- | TODO : add paring policy as parameter
56 pairing :: CorpusId -- (CorpusId, ListId) -- Pair (Either CorpusId AnnuaireId) ListId
57 -> AnnuaireId -- (AnnuaireId, ListId) -- Pair (Either CorpusId AnnuaireId) ListId
58 -> ListId
59 -> Cmd err Int
60 pairing cId aId lId = do
61 contacts' <- getAllContacts aId
62 let contactsMap = pairingPolicyToMap toLower
63 $ toMaps extractNgramsT (tr_docs contacts')
64
65 ngramsMap' <- getNgramsTindexed cId Authors
66 let ngramsMap = pairingPolicyToMap lastName ngramsMap'
67
68 let indexedNgrams = pairMaps contactsMap ngramsMap
69
70 insertDocNgrams lId indexedNgrams
71
72 lastName :: Terms -> Terms
73 lastName texte = DT.toLower
74 $ maybe texte (\x -> if DT.length x > 3 then x else texte) (lastName' texte)
75 where
76 lastName' = lastMay . DT.splitOn " "
77
78 -- TODO: this method is dangerous (maybe equalities of the result are not taken into account
79 -- emergency demo plan...)
80 pairingPolicyToMap :: (Terms -> Terms)
81 -> Map (NgramsT Ngrams) a
82 -> Map (NgramsT Ngrams) a
83 pairingPolicyToMap f = DM.mapKeys (pairingPolicy f)
84
85 pairingPolicy :: (Terms -> Terms)
86 -> NgramsT Ngrams
87 -> NgramsT Ngrams
88 pairingPolicy f (NgramsT nt (Ngrams ng _)) = (NgramsT nt (Ngrams (f ng) 1))
89
90 -- | TODO : use Occurrences in place of Int
91 extractNgramsT :: HyperdataContact
92 -> Map (NgramsT Ngrams) Int
93 extractNgramsT contact = fromList [(NgramsT Authors a' , 1)| a' <- authors ]
94 where
95 authors = map text2ngrams
96 $ catMaybes [ contact^.(hc_who . _Just . cw_lastName) ]
97
98
99 pairMaps :: Map (NgramsT Ngrams) a
100 -> Map (NgramsT Ngrams) NgramsId
101 -> Map NgramsIndexed (Map NgramsType a)
102 pairMaps m1 m2 =
103 DM.fromList
104 [ (NgramsIndexed ng nId, DM.singleton nt n2i)
105 | (k@(NgramsT nt ng),n2i) <- DM.toList m1
106 , Just nId <- [DM.lookup k m2]
107 ]
108
109 -----------------------------------------------------------------------
110 getNgramsTindexed :: CorpusId
111 -> NgramsType
112 -> Cmd err (Map (NgramsT Ngrams) NgramsId)
113 getNgramsTindexed corpusId ngramsType' = fromList
114 <$> map (\(ngramsId',t,n) -> (NgramsT ngramsType' (Ngrams t n),ngramsId'))
115 <$> selectNgramsTindexed corpusId ngramsType'
116 where
117 selectNgramsTindexed :: CorpusId
118 -> NgramsType
119 -> Cmd err [(NgramsId, Terms, Int)]
120 selectNgramsTindexed corpusId' ngramsType'' = runPGSQuery selectQuery (corpusId', ngramsTypeId ngramsType'')
121 where
122 selectQuery = [sql| SELECT n.id,n.terms,n.n from ngrams n
123 JOIN node_node_ngrams occ ON occ.ngrams_id = n.id
124 -- JOIN node_node_ngrams2 occ ON occ.ngrams_id = n.id
125 JOIN nodes_nodes nn ON nn.node2_id = occ.node2_id
126
127 WHERE nn.node1_id = ?
128 AND occ.ngrams_type = ?
129 AND occ.node2_id = nn.node2_id
130 GROUP BY n.id;
131 |]
132
133 {- | TODO more typed SQL queries
134 selectNgramsTindexed :: CorpusId -> NgramsType -> Query NgramsRead
135 selectNgramsTindexed corpusId ngramsType = proc () -> do
136 nodeNode <- queryNodeNodeTable -< ()
137 nodeNgrams <- queryNodesNgramsTable -< ()
138 ngrams <- queryNgramsTable -< ()
139
140 restrict -< node1_id nodeNode .== pgInt4 corpusId
141 restrict -< node2_id nodeNode .== node_id nodeNgrams
142 restrict -< ngrams_id ngrams .== node_ngrams nodeNgrams
143
144 result <- aggregate groupBy (ngrams_id ngrams)
145 returnA -< result
146 --}