]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Flow/Pairing.hs
[DB][OPTIM] WIP
[gargantext.git] / src / Gargantext / Database / Flow / Pairing.hs
1 {-|
2 Module : Gargantext.Database.Flow
3 Description : Database Flow
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE QuasiQuotes #-}
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE OverloadedStrings #-}
15 {-# LANGUAGE RankNTypes #-}
16 -- {-# LANGUAGE Arrows #-}
17
18 module Gargantext.Database.Flow.Pairing
19 (pairing)
20 where
21
22 --import Debug.Trace (trace)
23 import Control.Lens (_Just, (^.))
24 import Database.PostgreSQL.Simple.SqlQQ (sql)
25 -- import Opaleye
26 -- import Opaleye.Aggregate
27 -- import Control.Arrow (returnA)
28 import Data.Maybe (catMaybes)
29 import Data.Map (Map, fromList)
30 import Safe (lastMay)
31 import qualified Data.Map as DM
32 import Data.Text (Text, toLower)
33 import qualified Data.Text as DT
34 import Gargantext.Prelude hiding (sum)
35 import Gargantext.Core.Types (TableResult(..))
36 import Gargantext.Database.Schema.Ngrams -- (NgramsType(..))
37 import Gargantext.Database.Node.Contact -- (HyperdataContact(..))
38 import Gargantext.Database.Flow.Utils
39 import Gargantext.Database.Utils (Cmd, runPGSQuery)
40 import Gargantext.Database.Types.Node (AnnuaireId, CorpusId, ListId{-, DocId, ContactId-})
41 import Gargantext.Database.Node.Children (getAllContacts)
42
43 -- TODO mv this type in Types Main
44 type Terms = Text
45
46 {-
47 pairing'' :: (CorpusId, CorpusId) -> (DocId -> DocId)
48 pairing'' = undefined
49
50 pairing' :: (CorpusId, AnnuaireId) -> (DocId -> ContactId)
51 pairing' = undefined
52 -}
53
54 -- | TODO : add paring policy as parameter
55 pairing :: AnnuaireId
56 -> CorpusId
57 -> ListId
58 -> Cmd err Int
59 pairing aId cId lId = do
60 contacts' <- getAllContacts aId
61 let contactsMap = pairingPolicyToMap toLower
62 $ toMaps extractNgramsT (tr_docs contacts')
63
64 ngramsMap' <- getNgramsTindexed cId Authors
65 let ngramsMap = pairingPolicyToMap lastName ngramsMap'
66
67 let indexedNgrams = pairMaps contactsMap ngramsMap
68
69 insertDocNgrams lId indexedNgrams
70
71 lastName :: Terms -> Terms
72 lastName texte = DT.toLower
73 $ maybe texte (\x -> if DT.length x > 3 then x else texte) (lastName' texte)
74 where
75 lastName' = lastMay . DT.splitOn " "
76
77 -- TODO: this method is dangerous (maybe equalities of the result are not taken into account
78 -- emergency demo plan...)
79 pairingPolicyToMap :: (Terms -> Terms)
80 -> Map (NgramsT Ngrams) a
81 -> Map (NgramsT Ngrams) a
82 pairingPolicyToMap f = DM.mapKeys (pairingPolicy f)
83
84 pairingPolicy :: (Terms -> Terms)
85 -> NgramsT Ngrams
86 -> NgramsT Ngrams
87 pairingPolicy f (NgramsT nt (Ngrams ng _)) = (NgramsT nt (Ngrams (f ng) 1))
88
89 -- | TODO : use Occurrences in place of Int
90 extractNgramsT :: HyperdataContact
91 -> Map (NgramsT Ngrams) Int
92 extractNgramsT contact = fromList [(NgramsT Authors a' , 1)| a' <- authors ]
93 where
94 authors = map text2ngrams
95 $ catMaybes [ contact^.(hc_who . _Just . cw_lastName) ]
96
97
98 pairMaps :: Map (NgramsT Ngrams) a
99 -> Map (NgramsT Ngrams) NgramsId
100 -> Map NgramsIndexed (Map NgramsType a)
101 pairMaps m1 m2 =
102 DM.fromList
103 [ (NgramsIndexed ng nId, DM.singleton nt n2i)
104 | (k@(NgramsT nt ng),n2i) <- DM.toList m1
105 , Just nId <- [DM.lookup k m2]
106 ]
107
108 -----------------------------------------------------------------------
109 getNgramsTindexed :: CorpusId
110 -> NgramsType
111 -> Cmd err (Map (NgramsT Ngrams) NgramsId)
112 getNgramsTindexed corpusId ngramsType' = fromList
113 <$> map (\(ngramsId',t,n) -> (NgramsT ngramsType' (Ngrams t n),ngramsId'))
114 <$> selectNgramsTindexed corpusId ngramsType'
115 where
116 selectNgramsTindexed :: CorpusId
117 -> NgramsType
118 -> Cmd err [(NgramsId, Terms, Int)]
119 selectNgramsTindexed corpusId' ngramsType'' = runPGSQuery selectQuery (corpusId', ngramsTypeId ngramsType'')
120 where
121 selectQuery = [sql| SELECT n.id,n.terms,n.n from ngrams n
122 JOIN node_node_ngrams occ ON occ.ngrams_id = n.id
123 JOIN nodes_nodes nn ON nn.node2_id = occ.node2_id
124
125 WHERE nn.node1_id = ?
126 AND occ.ngrams_type = ?
127 AND occ.node2_id = nn.node2_id
128 GROUP BY n.id;
129 |]
130
131 {- | TODO more typed SQL queries
132 selectNgramsTindexed :: CorpusId -> NgramsType -> Query NgramsRead
133 selectNgramsTindexed corpusId ngramsType = proc () -> do
134 nodeNode <- queryNodeNodeTable -< ()
135 nodeNgrams <- queryNodesNgramsTable -< ()
136 ngrams <- queryNgramsTable -< ()
137
138 restrict -< node1_id nodeNode .== pgInt4 corpusId
139 restrict -< node2_id nodeNode .== node_id nodeNgrams
140 restrict -< ngrams_id ngrams .== node_ngrams nodeNgrams
141
142 result <- aggregate groupBy (ngrams_id ngrams)
143 returnA -< result
144 --}