]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Action/Flow/Pairing.hs
[Community] pairing function over (WIP tests)
[gargantext.git] / src / Gargantext / Database / Action / Flow / Pairing.hs
1 {-|
2 Module : Gargantext.Database.Flow
3 Description : Database Flow
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE QuasiQuotes #-}
13 -- {-# LANGUAGE Arrows #-}
14
15 module Gargantext.Database.Action.Flow.Pairing
16 -- (pairing)
17 where
18
19 import Data.Set (Set)
20 import Control.Lens (_Just, (^.))
21 import Data.Map (Map, fromList, fromListWith)
22 import Data.Maybe (catMaybes, fromMaybe)
23 import Data.Text (Text, toLower)
24 import Database.PostgreSQL.Simple.SqlQQ (sql)
25 import Gargantext.Core.Types (TableResult(..))
26 import Gargantext.Database
27 import Gargantext.Database.Admin.Types.Hyperdata -- (HyperdataContact(..))
28 import Gargantext.Database.Admin.Types.Node -- (AnnuaireId, CorpusId, ListId, DocId, ContactId, NodeId)
29 import Gargantext.Database.Prelude (Cmd, runPGSQuery)
30 import Gargantext.Database.Query.Table.Node.Children (getAllContacts)
31 import Gargantext.Database.Schema.Ngrams -- (NgramsType(..))
32 import Gargantext.Database.Schema.Node
33 import Gargantext.Prelude hiding (sum)
34 import Safe (lastMay)
35 import qualified Data.List as List
36 import qualified Data.Map as Map
37 import qualified Data.Text as DT
38 import qualified Data.Set as Set
39
40 -- TODO mv this type in Types Main
41 type Terms = Text
42
43
44
45 {-
46 pairingPolicy :: (Terms -> Terms)
47 -> NgramsT Ngrams
48 -> NgramsT Ngrams
49 pairingPolicy f (NgramsT nt (Ngrams ng _)) = (NgramsT nt (Ngrams (f ng) 1))
50
51
52 pairMaps :: Map (NgramsT Ngrams) a
53 -> Map (NgramsT Ngrams) NgramsId
54 -> Map NgramsIndexed (Map NgramsType a)
55 pairMaps m1 m2 =
56 DM.fromList
57 [ (NgramsIndexed ng nId, DM.singleton nt n2i)
58 | (k@(NgramsT nt ng),n2i) <- DM.toList m1
59 , Just nId <- [DM.lookup k m2]
60 ]
61 -}
62
63 -----------------------------------------------------------------------
64
65 pairing :: AnnuaireId -> CorpusId -> ListId -> Cmd err Int
66 pairing a c l = do
67 dataPaired <- dataPairing a (c,l,Authors) lastName toLower
68 insertDB $ prepareInsert dataPaired
69
70
71 dataPairing :: AnnuaireId
72 -> (CorpusId, ListId, NgramsType)
73 -> (ContactName -> Projected)
74 -> (DocAuthor -> Projected)
75 -> Cmd err (Map ContactId (Set DocId))
76 dataPairing aId (cId, lId, ngt) fc fa = do
77 mc <- getNgramsContactId aId
78 md <- getNgramsDocId cId lId ngt
79
80 let
81 from = projectionFrom (Set.fromList $ Map.keys mc) fc
82 to = projectionTo (Set.fromList $ Map.keys md) fa
83
84 pure $ fusion mc $ align from to md
85
86
87
88 prepareInsert :: Map ContactId (Set DocId) -> [NodeNode]
89 prepareInsert m = map (\(n1,n2) -> NodeNode n1 n2 Nothing Nothing)
90 $ List.concat
91 $ map (\(contactId, setDocIds)
92 -> map (\setDocId
93 -> (contactId, setDocId)
94 ) $ Set.toList setDocIds
95 )
96 $ Map.toList m
97
98
99
100 ------------------------------------------------------------------------
101 type ContactName = Text
102 type DocAuthor = Text
103 type Projected = Text
104
105 projectionFrom :: Set ContactName -> (ContactName -> Projected) -> Map ContactName Projected
106 projectionFrom ss f = fromList $ map (\s -> (s, f s)) (Set.toList ss)
107
108 projectionTo :: Set DocAuthor -> (DocAuthor -> Projected) -> Map Projected (Set DocAuthor)
109 projectionTo ss f = fromListWith (<>) $ map (\s -> (f s, Set.singleton s)) (Set.toList ss)
110
111 ------------------------------------------------------------------------
112 lastName :: Terms -> Terms
113 lastName texte = DT.toLower
114 $ maybe texte (\x -> if DT.length x > 3 then x else texte)
115 (lastName' texte)
116 where
117 lastName' = lastMay . DT.splitOn " "
118
119
120 ------------------------------------------------------------------------
121 align :: Map ContactName Projected
122 -> Map Projected (Set DocAuthor)
123 -> Map DocAuthor (Set DocId)
124 -> Map ContactName (Set DocId)
125 align mc ma md = fromListWith (<>)
126 $ map (\c -> (c, getProjection md $ testProjection c mc ma))
127 $ Map.keys mc
128 where
129 getProjection :: Map DocAuthor (Set DocId) -> Set DocAuthor -> Set DocId
130 getProjection ma' sa' =
131 if Set.null sa'
132 then Set.empty
133 else Set.unions $ sets ma' sa'
134 where
135 sets ma'' sa'' = Set.map (\s -> lookup s ma'') sa''
136 lookup s' ma''= fromMaybe Set.empty (Map.lookup s' ma'')
137
138 testProjection :: ContactName
139 -> Map ContactName Projected
140 -> Map Projected (Set DocAuthor)
141 -> Set DocAuthor
142 testProjection cn' mc' ma' = case Map.lookup cn' mc' of
143 Nothing -> Set.empty
144 Just c -> case Map.lookup c ma' of
145 Nothing -> Set.empty
146 Just a -> a
147
148 fusion :: Map ContactName (Set ContactId)
149 -> Map ContactName (Set DocId)
150 -> Map ContactId (Set DocId)
151 fusion mc md = Map.fromListWith (<>)
152 $ catMaybes
153 $ [ (,) <$> Just cId <*> Map.lookup cn md
154 | (cn, setContactId) <- Map.toList mc
155 , cId <- Set.toList setContactId
156 ]
157 ------------------------------------------------------------------------
158
159 getNgramsContactId :: AnnuaireId
160 -> Cmd err (Map ContactName (Set NodeId))
161 getNgramsContactId aId = do
162 contacts <- getAllContacts aId
163 pure $ fromListWith (<>)
164 $ catMaybes
165 $ map (\contact -> (,) <$> contact^.(node_hyperdata . hc_who . _Just . cw_lastName)
166 <*> Just ( Set.singleton (contact^.node_id))
167 ) (tr_docs contacts)
168
169 -- | TODO
170 -- filter Trash / map Authors
171 -- Indexing all ngramsType like Authors
172 getNgramsDocId :: CorpusId
173 -> ListId
174 -> NgramsType
175 -> Cmd err (Map DocAuthor (Set NodeId))
176 getNgramsDocId corpusId listId nt
177 = fromListWith (<>)
178 <$> map (\(t,nId) -> (t, Set.singleton (NodeId nId)))
179 <$> selectNgramsDocId corpusId listId nt
180
181
182 selectNgramsDocId :: CorpusId
183 -> ListId
184 -> NgramsType
185 -> Cmd err [(Text, Int)]
186 selectNgramsDocId corpusId' listId' ngramsType' =
187 runPGSQuery selectQuery (corpusId', listId', ngramsTypeId ngramsType')
188 where
189 selectQuery = [sql| SELECT ng.terms,nnng.node2_id from ngrams ng
190 JOIN node_node_ngrams nnng ON nnng.ngrams_id = ng.id
191 JOIN nodes_nodes nn ON nn.node2_id = nnng.node2_id
192
193 WHERE nn.node1_id = ?
194 AND nnng.node1_id = ?
195 AND nnng.ngrams_type = ?
196 ;
197 |]
198