]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Action/Flow/Pairing.hs
[Clean]
[gargantext.git] / src / Gargantext / Database / Action / Flow / Pairing.hs
1 {-|
2 Module : Gargantext.Database.Flow
3 Description : Database Flow
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE QuasiQuotes #-}
13 -- {-# LANGUAGE Arrows #-}
14
15 module Gargantext.Database.Action.Flow.Pairing
16 -- (pairing)
17 where
18
19 import Data.Set (Set)
20 import Control.Lens (_Just, (^.))
21 import Data.Map (Map, fromList, fromListWith)
22 import Data.Maybe (catMaybes, fromMaybe)
23 import Data.Text (Text, toLower)
24 import Database.PostgreSQL.Simple.SqlQQ (sql)
25 import Gargantext.Core.Types (TableResult(..), Term)
26 import Gargantext.Database
27 import Gargantext.Database.Admin.Types.Hyperdata -- (HyperdataContact(..))
28 import Gargantext.Database.Admin.Types.Node -- (AnnuaireId, CorpusId, ListId, DocId, ContactId, NodeId)
29 import Gargantext.Database.Prelude (Cmd, runPGSQuery)
30 import Gargantext.Database.Query.Table.Node.Children (getAllContacts)
31 import Gargantext.Database.Schema.Ngrams -- (NgramsType(..))
32 import Gargantext.Database.Schema.Node
33 import Gargantext.Prelude hiding (sum)
34 import Safe (lastMay)
35 import qualified Data.List as List
36 import qualified Data.Map as Map
37 import qualified Data.Text as DT
38 import qualified Data.Set as Set
39
40
41 -----------------------------------------------------------------------
42 pairing :: AnnuaireId -> CorpusId -> ListId -> Cmd err Int
43 pairing a c l = do
44 dataPaired <- dataPairing a (c,l,Authors) lastName toLower
45 insertDB $ prepareInsert dataPaired
46
47
48 dataPairing :: AnnuaireId
49 -> (CorpusId, ListId, NgramsType)
50 -> (ContactName -> Projected)
51 -> (DocAuthor -> Projected)
52 -> Cmd err (Map ContactId (Set DocId))
53 dataPairing aId (cId, lId, ngt) fc fa = do
54 mc <- getNgramsContactId aId
55 md <- getNgramsDocId cId lId ngt
56
57 let
58 from = projectionFrom (Set.fromList $ Map.keys mc) fc
59 to = projectionTo (Set.fromList $ Map.keys md) fa
60
61 pure $ fusion mc $ align from to md
62
63
64
65 prepareInsert :: Map ContactId (Set DocId) -> [NodeNode]
66 prepareInsert m = map (\(n1,n2) -> NodeNode n1 n2 Nothing Nothing)
67 $ List.concat
68 $ map (\(contactId, setDocIds)
69 -> map (\setDocId
70 -> (contactId, setDocId)
71 ) $ Set.toList setDocIds
72 )
73 $ Map.toList m
74
75 ------------------------------------------------------------------------
76 type ContactName = Text
77 type DocAuthor = Text
78 type Projected = Text
79
80 projectionFrom :: Set ContactName -> (ContactName -> Projected) -> Map ContactName Projected
81 projectionFrom ss f = fromList $ map (\s -> (s, f s)) (Set.toList ss)
82
83 projectionTo :: Set DocAuthor -> (DocAuthor -> Projected) -> Map Projected (Set DocAuthor)
84 projectionTo ss f = fromListWith (<>) $ map (\s -> (f s, Set.singleton s)) (Set.toList ss)
85
86 ------------------------------------------------------------------------
87 lastName :: Term -> Term
88 lastName texte = DT.toLower
89 $ maybe texte (\x -> if DT.length x > 3 then x else texte)
90 (lastName' texte)
91 where
92 lastName' = lastMay . DT.splitOn " "
93
94
95 ------------------------------------------------------------------------
96 align :: Map ContactName Projected
97 -> Map Projected (Set DocAuthor)
98 -> Map DocAuthor (Set DocId)
99 -> Map ContactName (Set DocId)
100 align mc ma md = fromListWith (<>)
101 $ map (\c -> (c, getProjection md $ testProjection c mc ma))
102 $ Map.keys mc
103 where
104 getProjection :: Map DocAuthor (Set DocId) -> Set DocAuthor -> Set DocId
105 getProjection ma' sa' =
106 if Set.null sa'
107 then Set.empty
108 else Set.unions $ sets ma' sa'
109 where
110 sets ma'' sa'' = Set.map (\s -> lookup s ma'') sa''
111 lookup s' ma''= fromMaybe Set.empty (Map.lookup s' ma'')
112
113 testProjection :: ContactName
114 -> Map ContactName Projected
115 -> Map Projected (Set DocAuthor)
116 -> Set DocAuthor
117 testProjection cn' mc' ma' = case Map.lookup cn' mc' of
118 Nothing -> Set.empty
119 Just c -> case Map.lookup c ma' of
120 Nothing -> Set.empty
121 Just a -> a
122
123 fusion :: Map ContactName (Set ContactId)
124 -> Map ContactName (Set DocId)
125 -> Map ContactId (Set DocId)
126 fusion mc md = Map.fromListWith (<>)
127 $ catMaybes
128 $ [ (,) <$> Just cId <*> Map.lookup cn md
129 | (cn, setContactId) <- Map.toList mc
130 , cId <- Set.toList setContactId
131 ]
132 ------------------------------------------------------------------------
133
134 getNgramsContactId :: AnnuaireId
135 -> Cmd err (Map ContactName (Set NodeId))
136 getNgramsContactId aId = do
137 contacts <- getAllContacts aId
138 pure $ fromListWith (<>)
139 $ catMaybes
140 $ map (\contact -> (,) <$> contact^.(node_hyperdata . hc_who . _Just . cw_lastName)
141 <*> Just ( Set.singleton (contact^.node_id))
142 ) (tr_docs contacts)
143
144 -- | TODO
145 -- filter Trash / map Authors
146 -- Indexing all ngramsType like Authors
147 getNgramsDocId :: CorpusId
148 -> ListId
149 -> NgramsType
150 -> Cmd err (Map DocAuthor (Set NodeId))
151 getNgramsDocId corpusId listId nt
152 = fromListWith (<>)
153 <$> map (\(t,nId) -> (t, Set.singleton (NodeId nId)))
154 <$> selectNgramsDocId corpusId listId nt
155
156
157 selectNgramsDocId :: CorpusId
158 -> ListId
159 -> NgramsType
160 -> Cmd err [(Text, Int)]
161 selectNgramsDocId corpusId' listId' ngramsType' =
162 runPGSQuery selectQuery (corpusId', listId', ngramsTypeId ngramsType')
163 where
164 selectQuery = [sql| SELECT ng.terms,nnng.node2_id from ngrams ng
165 JOIN node_node_ngrams nnng ON nnng.ngrams_id = ng.id
166 JOIN nodes_nodes nn ON nn.node2_id = nnng.node2_id
167
168 WHERE nn.node1_id = ?
169 AND nnng.node1_id = ?
170 AND nnng.ngrams_type = ?
171 ;
172 |]
173