2 Module : Gargantext.Database.Flow
3 Description : Database Flow
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE QuasiQuotes #-}
13 {-# LANGUAGE Arrows #-}
15 module Gargantext.Database.Action.Flow.Pairing
19 import Control.Lens (_Just, (^.))
20 import Data.HashMap.Strict (HashMap)
21 import qualified Data.HashMap.Strict as HM
22 import Data.Maybe (catMaybes, fromMaybe)
24 import Gargantext.API.Ngrams.Tools
25 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
26 import Gargantext.API.Prelude (GargNoServer)
27 import Gargantext.Core
28 import Gargantext.Core.Types (TableResult(..))
29 import Gargantext.Core.Types.Main
30 import Gargantext.Database
31 import Gargantext.Database.Action.Metrics.NgramsByNode (getNodesByNgramsOnlyUser)
32 import Gargantext.Database.Admin.Config
33 import Gargantext.Database.Admin.Types.Hyperdata -- (HyperdataContact(..))
34 import Gargantext.Database.Admin.Types.Node -- (AnnuaireId, CorpusId, ListId, DocId, ContactId, NodeId)
35 import Gargantext.Database.Query.Prelude (leftJoin2, returnA, queryNodeNodeTable)
36 import Gargantext.Database.Query.Table.Node.Children (getAllContacts)
37 import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
38 import Gargantext.Database.Query.Table.Node (defaultList)
39 import Gargantext.Database.Query.Table.NodeNode (insertNodeNode)
40 import Gargantext.Database.Schema.Ngrams -- (NgramsType(..))
41 import Gargantext.Database.Schema.Node
42 import Gargantext.Prelude hiding (sum)
44 import qualified Data.HashMap.Strict as HashMap
45 import qualified Data.List as List
46 import qualified Data.Set as Set
47 import qualified Data.Text as DT
50 -- All NodeAnnuaire paired with a Corpus of NodeId nId:
51 -- isPairedWith NodeAnnuaire corpusId
52 isPairedWith :: NodeId -> NodeType -> Cmd err [NodeId]
53 isPairedWith nId nt = runOpaQuery (selectQuery nt nId)
55 selectQuery :: NodeType -> NodeId -> Select (Column SqlInt4)
56 selectQuery nt' nId' = proc () -> do
57 (node, node_node) <- queryJoin -< ()
58 restrict -< (node^.node_typename) .== (sqlInt4 $ toDBid nt')
59 restrict -< (node_node^.nn_node1_id) .== (toNullable $ pgNodeId nId')
60 returnA -< node^.node_id
62 queryJoin :: Select (NodeRead, NodeNodeReadNull)
63 queryJoin = leftJoin2 queryNodeTable queryNodeNodeTable cond
65 cond (node, node_node) = node^.node_id .== node_node^. nn_node2_id
67 -----------------------------------------------------------------------
68 pairing :: AnnuaireId -> CorpusId -> Maybe ListId -> GargNoServer Int
71 Nothing -> defaultList c
73 dataPaired <- dataPairing a (c,l,Authors) takeName takeName
74 r <- insertDB $ prepareInsert dataPaired
75 _ <- insertNodeNode [ NodeNode { _nn_node1_id = c
78 , _nn_category = Nothing }]
82 dataPairing :: AnnuaireId
83 -> (CorpusId, ListId, NgramsType)
84 -> (ContactName -> Projected)
85 -> (DocAuthor -> Projected)
86 -> GargNoServer (HashMap ContactId (Set DocId))
87 dataPairing aId (cId, lId, ngt) fc fa = do
88 mc <- getNgramsContactId aId
89 md <- getNgramsDocId cId lId ngt
91 printDebug "ngramsContactId" mc
92 printDebug "ngramsDocId" md
94 from = projectionFrom (Set.fromList $ HM.keys mc) fc
95 to = projectionTo (Set.fromList $ HM.keys md) fa
97 pure $ fusion mc $ align from to md
101 prepareInsert :: HashMap ContactId (Set DocId) -> [NodeNode]
102 prepareInsert m = map (\(n1,n2) -> NodeNode { _nn_node1_id = n1
104 , _nn_score = Nothing
105 , _nn_category = Nothing })
107 $ map (\(contactId, setDocIds)
109 -> (contactId, setDocId)
110 ) $ Set.toList setDocIds
114 ------------------------------------------------------------------------
115 type ContactName = NgramsTerm
116 type DocAuthor = NgramsTerm
117 type Projected = NgramsTerm
119 projectionFrom :: Set ContactName -> (ContactName -> Projected) -> HashMap ContactName Projected
120 projectionFrom ss f = HM.fromList $ map (\s -> (s, f s)) (Set.toList ss) -- use HS.toMap
122 projectionTo :: Set DocAuthor -> (DocAuthor -> Projected) -> HashMap Projected (Set DocAuthor)
123 projectionTo ss f = HM.fromListWith (<>) $ map (\s -> (f s, Set.singleton s)) (Set.toList ss) -- use HS.toMap
124 ------------------------------------------------------------------------
125 takeName :: NgramsTerm -> NgramsTerm
126 takeName (NgramsTerm texte) = NgramsTerm $ DT.toLower texte'
128 texte' = maybe texte (\x -> if DT.length x > 3 then x else texte)
130 lastName' = lastMay . DT.splitOn " "
133 ------------------------------------------------------------------------
134 align :: HashMap ContactName Projected
135 -> HashMap Projected (Set DocAuthor)
136 -> HashMap DocAuthor (Set DocId)
137 -> HashMap ContactName (Set DocId)
138 align mc ma md = HM.fromListWith (<>)
139 $ map (\c -> (c, getProjection md $ testProjection c mc ma))
142 getProjection :: HashMap DocAuthor (Set DocId) -> Set DocAuthor -> Set DocId
143 getProjection ma' sa' =
146 else Set.unions $ sets ma' sa'
148 sets ma'' sa'' = Set.map (\s -> lookup s ma'') sa''
149 lookup s' ma''= fromMaybe Set.empty (HM.lookup s' ma'')
151 testProjection :: ContactName
152 -> HashMap ContactName Projected
153 -> HashMap Projected (Set DocAuthor)
155 testProjection cn' mc' ma' = case HM.lookup cn' mc' of
157 Just c -> case HM.lookup c ma' of
161 fusion :: HashMap ContactName (Set ContactId)
162 -> HashMap ContactName (Set DocId)
163 -> HashMap ContactId (Set DocId)
164 fusion mc md = HM.fromListWith (<>)
166 $ [ (,) <$> Just cId <*> HM.lookup cn md
167 | (cn, setContactId) <- HM.toList mc
168 , cId <- Set.toList setContactId
170 ------------------------------------------------------------------------
172 getNgramsContactId :: AnnuaireId
173 -> Cmd err (HashMap ContactName (Set NodeId))
174 getNgramsContactId aId = do
175 contacts <- getAllContacts aId
176 pure $ HM.fromListWith (<>)
178 $ map (\contact -> (,) <$> (NgramsTerm <$> contact^.(node_hyperdata . hc_who . _Just . cw_lastName))
179 <*> Just ( Set.singleton (contact^.node_id))
183 getNgramsDocId :: CorpusId
186 -> GargNoServer (HashMap DocAuthor (Set NodeId))
187 getNgramsDocId cId lId nt = do
188 lIds <- selectNodesWithUsername NodeList userMaster
189 repo <- getRepo' lIds
190 let ngs = filterListWithRoot MapTerm $ mapTermListRoot [lId] nt repo
192 groupNodesByNgrams ngs
193 <$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs)