2 Module : Gargantext.Database.Flow
3 Description : Database Flow
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE QuasiQuotes #-}
13 -- {-# LANGUAGE Arrows #-}
15 module Gargantext.Database.Action.Flow.Pairing
20 import Control.Lens (_Just, (^.))
21 import Data.Map (Map, fromList, fromListWith)
22 import Data.Maybe (catMaybes, fromMaybe)
23 import Data.Text (Text, toLower)
24 import Database.PostgreSQL.Simple.SqlQQ (sql)
25 import Gargantext.Core.Types (TableResult(..))
26 import Gargantext.Database
27 import Gargantext.Database.Admin.Types.Hyperdata -- (HyperdataContact(..))
28 import Gargantext.Database.Admin.Types.Node -- (AnnuaireId, CorpusId, ListId, DocId, ContactId, NodeId)
29 import Gargantext.Database.Prelude (Cmd, runPGSQuery)
30 import Gargantext.Database.Query.Table.Node.Children (getAllContacts)
31 import Gargantext.Database.Schema.Ngrams -- (NgramsType(..))
32 import Gargantext.Database.Schema.Node
33 import Gargantext.Prelude hiding (sum)
35 import qualified Data.List as List
36 import qualified Data.Map as Map
37 import qualified Data.Text as DT
38 import qualified Data.Set as Set
40 -- TODO mv this type in Types Main
46 pairingPolicy :: (Terms -> Terms)
49 pairingPolicy f (NgramsT nt (Ngrams ng _)) = (NgramsT nt (Ngrams (f ng) 1))
52 pairMaps :: Map (NgramsT Ngrams) a
53 -> Map (NgramsT Ngrams) NgramsId
54 -> Map NgramsIndexed (Map NgramsType a)
57 [ (NgramsIndexed ng nId, DM.singleton nt n2i)
58 | (k@(NgramsT nt ng),n2i) <- DM.toList m1
59 , Just nId <- [DM.lookup k m2]
63 -----------------------------------------------------------------------
65 pairing :: AnnuaireId -> CorpusId -> ListId -> Cmd err Int
67 dataPaired <- dataPairing a (c,l,Authors) lastName toLower
68 insertDB $ prepareInsert dataPaired
71 dataPairing :: AnnuaireId
72 -> (CorpusId, ListId, NgramsType)
73 -> (ContactName -> Projected)
74 -> (DocAuthor -> Projected)
75 -> Cmd err (Map ContactId (Set DocId))
76 dataPairing aId (cId, lId, ngt) fc fa = do
77 mc <- getNgramsContactId aId
78 md <- getNgramsDocId cId lId ngt
81 from = projectionFrom (Set.fromList $ Map.keys mc) fc
82 to = projectionTo (Set.fromList $ Map.keys md) fa
84 pure $ fusion mc $ align from to md
88 prepareInsert :: Map ContactId (Set DocId) -> [NodeNode]
89 prepareInsert m = map (\(n1,n2) -> NodeNode n1 n2 Nothing Nothing)
91 $ map (\(contactId, setDocIds)
93 -> (contactId, setDocId)
94 ) $ Set.toList setDocIds
100 ------------------------------------------------------------------------
101 type ContactName = Text
102 type DocAuthor = Text
103 type Projected = Text
105 projectionFrom :: Set ContactName -> (ContactName -> Projected) -> Map ContactName Projected
106 projectionFrom ss f = fromList $ map (\s -> (s, f s)) (Set.toList ss)
108 projectionTo :: Set DocAuthor -> (DocAuthor -> Projected) -> Map Projected (Set DocAuthor)
109 projectionTo ss f = fromListWith (<>) $ map (\s -> (f s, Set.singleton s)) (Set.toList ss)
111 ------------------------------------------------------------------------
112 lastName :: Terms -> Terms
113 lastName texte = DT.toLower
114 $ maybe texte (\x -> if DT.length x > 3 then x else texte)
117 lastName' = lastMay . DT.splitOn " "
120 ------------------------------------------------------------------------
121 align :: Map ContactName Projected
122 -> Map Projected (Set DocAuthor)
123 -> Map DocAuthor (Set DocId)
124 -> Map ContactName (Set DocId)
125 align mc ma md = fromListWith (<>)
126 $ map (\c -> (c, getProjection md $ testProjection c mc ma))
129 getProjection :: Map DocAuthor (Set DocId) -> Set DocAuthor -> Set DocId
130 getProjection ma' sa' =
133 else Set.unions $ sets ma' sa'
135 sets ma'' sa'' = Set.map (\s -> lookup s ma'') sa''
136 lookup s' ma''= fromMaybe Set.empty (Map.lookup s' ma'')
138 testProjection :: ContactName
139 -> Map ContactName Projected
140 -> Map Projected (Set DocAuthor)
142 testProjection cn' mc' ma' = case Map.lookup cn' mc' of
144 Just c -> case Map.lookup c ma' of
149 fusion :: Map ContactName (Set ContactId)
150 -> Map ContactName (Set DocId)
151 -> Map ContactId (Set DocId)
152 fusion _mc _md = undefined
155 $ map (\c -> case Map.lookup c mc of
161 ------------------------------------------------------------------------
163 getNgramsContactId :: AnnuaireId
164 -> Cmd err (Map ContactName (Set NodeId))
165 getNgramsContactId aId = do
166 contacts <- getAllContacts aId
167 pure $ fromListWith (<>)
169 $ map (\contact -> (,) <$> contact^.(node_hyperdata . hc_who . _Just . cw_lastName)
170 <*> Just ( Set.singleton (contact^.node_id))
174 -- filter Trash / map Authors
175 -- Indexing all ngramsType like Authors
176 getNgramsDocId :: CorpusId
179 -> Cmd err (Map DocAuthor (Set NodeId))
180 getNgramsDocId corpusId listId nt
182 <$> map (\(t,nId) -> (t, Set.singleton (NodeId nId)))
183 <$> selectNgramsDocId corpusId listId nt
186 selectNgramsDocId :: CorpusId
189 -> Cmd err [(Text, Int)]
190 selectNgramsDocId corpusId' listId' ngramsType' =
191 runPGSQuery selectQuery (corpusId', listId', ngramsTypeId ngramsType')
193 selectQuery = [sql| SELECT ng.terms,nnng.node2_id from ngrams ng
194 JOIN node_node_ngrams nnng ON nnng.ngrams_id = ng.id
195 JOIN nodes_nodes nn ON nn.node2_id = nnng.node2_id
197 WHERE nn.node1_id = ?
198 AND nnng.node1_id = ?
199 AND nnng.ngrams_type = ?