]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Action/Flow/Pairing.hs
[ngrams] NgramsTerm as a newtype
[gargantext.git] / src / Gargantext / Database / Action / Flow / Pairing.hs
1 {-|
2 Module : Gargantext.Database.Flow
3 Description : Database Flow
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE QuasiQuotes #-}
13 {-# LANGUAGE Arrows #-}
14
15 module Gargantext.Database.Action.Flow.Pairing
16 -- (pairing)
17 where
18
19 import Control.Lens (_Just, (^.))
20 import Data.Map (Map, fromList, fromListWith)
21 import Data.Maybe (catMaybes, fromMaybe)
22 import Data.Set (Set)
23 import Data.Text (Text)
24 import Gargantext.API.Ngrams.Tools
25 import Gargantext.API.Prelude (GargNoServer)
26 import Gargantext.Core.Types (TableResult(..), Term)
27 import Gargantext.Core.Types.Main
28 import Gargantext.Database
29 import Gargantext.Database.Action.Metrics.NgramsByNode (getNodesByNgramsOnlyUser)
30 import Gargantext.Database.Admin.Config
31 import Gargantext.Database.Admin.Config (nodeTypeId)
32 import Gargantext.Database.Admin.Types.Hyperdata -- (HyperdataContact(..))
33 import Gargantext.Database.Admin.Types.Node -- (AnnuaireId, CorpusId, ListId, DocId, ContactId, NodeId)
34 import Gargantext.Database.Prelude (Cmd, runOpaQuery)
35 import Gargantext.Database.Query.Prelude (leftJoin2, returnA, queryNodeNodeTable)
36 import Gargantext.Database.Query.Table.Node.Children (getAllContacts)
37 import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
38 import Gargantext.Database.Query.Table.Node (defaultList)
39 import Gargantext.Database.Query.Table.NodeNode (insertNodeNode)
40 import Gargantext.Database.Schema.Ngrams -- (NgramsType(..))
41 import Gargantext.Database.Schema.Node
42 import Gargantext.Prelude hiding (sum)
43 import Opaleye
44 import Safe (lastMay)
45 import qualified Data.List as List
46 import qualified Data.Map as Map
47 import qualified Data.Set as Set
48 import qualified Data.Text as DT
49
50
51 -- | isPairedWith
52 -- All NodeAnnuaire paired with a Corpus of NodeId nId:
53 -- isPairedWith NodeAnnuaire corpusId
54 isPairedWith :: NodeId -> NodeType -> Cmd err [NodeId]
55 isPairedWith nId nt = runOpaQuery (selectQuery nt nId)
56 where
57 selectQuery :: NodeType -> NodeId -> Query (Column PGInt4)
58 selectQuery nt' nId' = proc () -> do
59 (node, node_node) <- queryJoin -< ()
60 restrict -< (node^.node_typename) .== (pgInt4 $ nodeTypeId nt')
61 restrict -< (node_node^.nn_node1_id) .== (toNullable $ pgNodeId nId')
62 returnA -< node^.node_id
63
64 queryJoin :: Query (NodeRead, NodeNodeReadNull)
65 queryJoin = leftJoin2 queryNodeTable queryNodeNodeTable cond
66 where
67 cond (node, node_node) = node^.node_id .== node_node^. nn_node2_id
68
69 -----------------------------------------------------------------------
70 pairing :: AnnuaireId -> CorpusId -> Maybe ListId -> GargNoServer Int
71 pairing a c l' = do
72 l <- case l' of
73 Nothing -> defaultList c
74 Just l'' -> pure l''
75 dataPaired <- dataPairing a (c,l,Authors) takeName takeName
76 r <- insertDB $ prepareInsert dataPaired
77 _ <- insertNodeNode [ NodeNode c a Nothing Nothing]
78 pure r
79
80
81 dataPairing :: AnnuaireId
82 -> (CorpusId, ListId, NgramsType)
83 -> (ContactName -> Projected)
84 -> (DocAuthor -> Projected)
85 -> GargNoServer (Map ContactId (Set DocId))
86 dataPairing aId (cId, lId, ngt) fc fa = do
87 mc <- getNgramsContactId aId
88 md <- getNgramsDocId cId lId ngt
89
90 printDebug "ngramsContactId" mc
91 printDebug "ngramsDocId" md
92 let
93 from = projectionFrom (Set.fromList $ Map.keys mc) fc
94 to = projectionTo (Set.fromList $ Map.keys md) fa
95
96 pure $ fusion mc $ align from to md
97
98
99
100 prepareInsert :: Map ContactId (Set DocId) -> [NodeNode]
101 prepareInsert m = map (\(n1,n2) -> NodeNode n1 n2 Nothing Nothing)
102 $ List.concat
103 $ map (\(contactId, setDocIds)
104 -> map (\setDocId
105 -> (contactId, setDocId)
106 ) $ Set.toList setDocIds
107 )
108 $ Map.toList m
109
110 ------------------------------------------------------------------------
111 type ContactName = Text
112 type DocAuthor = Text
113 type Projected = Text
114
115 projectionFrom :: Set ContactName -> (ContactName -> Projected) -> Map ContactName Projected
116 projectionFrom ss f = fromList $ map (\s -> (s, f s)) (Set.toList ss)
117
118 projectionTo :: Set DocAuthor -> (DocAuthor -> Projected) -> Map Projected (Set DocAuthor)
119 projectionTo ss f = fromListWith (<>) $ map (\s -> (f s, Set.singleton s)) (Set.toList ss)
120 ------------------------------------------------------------------------
121 takeName :: Term -> Term
122 takeName texte = DT.toLower texte'
123 where
124 texte' = maybe texte (\x -> if DT.length x > 3 then x else texte)
125 (lastName' texte)
126 lastName' = lastMay . DT.splitOn " "
127
128
129 ------------------------------------------------------------------------
130 align :: Map ContactName Projected
131 -> Map Projected (Set DocAuthor)
132 -> Map DocAuthor (Set DocId)
133 -> Map ContactName (Set DocId)
134 align mc ma md = fromListWith (<>)
135 $ map (\c -> (c, getProjection md $ testProjection c mc ma))
136 $ Map.keys mc
137 where
138 getProjection :: Map DocAuthor (Set DocId) -> Set DocAuthor -> Set DocId
139 getProjection ma' sa' =
140 if Set.null sa'
141 then Set.empty
142 else Set.unions $ sets ma' sa'
143 where
144 sets ma'' sa'' = Set.map (\s -> lookup s ma'') sa''
145 lookup s' ma''= fromMaybe Set.empty (Map.lookup s' ma'')
146
147 testProjection :: ContactName
148 -> Map ContactName Projected
149 -> Map Projected (Set DocAuthor)
150 -> Set DocAuthor
151 testProjection cn' mc' ma' = case Map.lookup cn' mc' of
152 Nothing -> Set.empty
153 Just c -> case Map.lookup c ma' of
154 Nothing -> Set.empty
155 Just a -> a
156
157 fusion :: Map ContactName (Set ContactId)
158 -> Map ContactName (Set DocId)
159 -> Map ContactId (Set DocId)
160 fusion mc md = Map.fromListWith (<>)
161 $ catMaybes
162 $ [ (,) <$> Just cId <*> Map.lookup cn md
163 | (cn, setContactId) <- Map.toList mc
164 , cId <- Set.toList setContactId
165 ]
166 ------------------------------------------------------------------------
167
168 getNgramsContactId :: AnnuaireId
169 -> Cmd err (Map ContactName (Set NodeId))
170 getNgramsContactId aId = do
171 contacts <- getAllContacts aId
172 pure $ fromListWith (<>)
173 $ catMaybes
174 $ map (\contact -> (,) <$> contact^.(node_hyperdata . hc_who . _Just . cw_lastName)
175 <*> Just ( Set.singleton (contact^.node_id))
176 ) (tr_docs contacts)
177
178
179 getNgramsDocId :: CorpusId
180 -> ListId
181 -> NgramsType
182 -> GargNoServer (Map DocAuthor (Set NodeId))
183 getNgramsDocId cId lId nt = do
184 repo <- getRepo
185 lIds <- selectNodesWithUsername NodeList userMaster
186 let ngs = filterListWithRoot MapTerm $ mapTermListRoot [lId] nt repo
187
188 groupNodesByNgrams ngs
189 <$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) nt (Map.keys ngs)