]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Action/Flow/Pairing.hs
[FACTORING] G.Text.Terms.
[gargantext.git] / src / Gargantext / Database / Action / Flow / Pairing.hs
1 {-|
2 Module : Gargantext.Database.Flow
3 Description : Database Flow
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE FlexibleContexts #-}
13 {-# LANGUAGE QuasiQuotes #-}
14 {-# LANGUAGE NoImplicitPrelude #-}
15 {-# LANGUAGE OverloadedStrings #-}
16 {-# LANGUAGE RankNTypes #-}
17 -- {-# LANGUAGE Arrows #-}
18
19 module Gargantext.Database.Action.Flow.Pairing
20 (pairing)
21 where
22
23 import Control.Lens (_Just, (^.))
24 import Data.Map (Map, fromList)
25 import Data.Maybe (catMaybes)
26 import Data.Text (Text, toLower)
27 import Database.PostgreSQL.Simple.SqlQQ (sql)
28 import Gargantext.Core.Types (TableResult(..))
29 import Gargantext.Database.Action.Flow.Utils
30 import Gargantext.Database.Admin.Types.Node (AnnuaireId, CorpusId, ListId{-, DocId, ContactId-})
31 import Gargantext.Database.Admin.Utils (Cmd, runPGSQuery)
32 import Gargantext.Database.Action.Query.Node.Children (getAllContacts)
33 import Gargantext.Database.Action.Query.Node.Contact -- (HyperdataContact(..))
34 import Gargantext.Database.Schema.Ngrams -- (NgramsType(..))
35 import Gargantext.Prelude hiding (sum)
36 import Safe (lastMay)
37 import qualified Data.Map as DM
38 import qualified Data.Text as DT
39
40 -- TODO mv this type in Types Main
41 type Terms = Text
42
43 {-
44 pairing'' :: (CorpusId, CorpusId) -> (DocId -> DocId)
45 pairing'' = undefined
46
47 pairing' :: (CorpusId, AnnuaireId) -> (DocId -> ContactId)
48 pairing' = undefined
49 -}
50
51 -- | TODO : add paring policy as parameter
52 pairing :: CorpusId -- (CorpusId, ListId) -- Pair (Either CorpusId AnnuaireId) ListId
53 -> AnnuaireId -- (AnnuaireId, ListId) -- Pair (Either CorpusId AnnuaireId) ListId
54 -> ListId
55 -> Cmd err Int
56 pairing cId aId lId = do
57 contacts' <- getAllContacts aId
58 let contactsMap = pairingPolicyToMap toLower
59 $ toMaps extractNgramsT (tr_docs contacts')
60
61 ngramsMap' <- getNgramsTindexed cId Authors
62 let ngramsMap = pairingPolicyToMap lastName ngramsMap'
63
64 let indexedNgrams = pairMaps contactsMap ngramsMap
65
66 insertDocNgrams lId indexedNgrams
67
68 lastName :: Terms -> Terms
69 lastName texte = DT.toLower
70 $ maybe texte (\x -> if DT.length x > 3 then x else texte) (lastName' texte)
71 where
72 lastName' = lastMay . DT.splitOn " "
73
74 -- TODO: this method is dangerous (maybe equalities of the result are not taken into account
75 -- emergency demo plan...)
76 pairingPolicyToMap :: (Terms -> Terms)
77 -> Map (NgramsT Ngrams) a
78 -> Map (NgramsT Ngrams) a
79 pairingPolicyToMap f = DM.mapKeys (pairingPolicy f)
80
81 pairingPolicy :: (Terms -> Terms)
82 -> NgramsT Ngrams
83 -> NgramsT Ngrams
84 pairingPolicy f (NgramsT nt (Ngrams ng _)) = (NgramsT nt (Ngrams (f ng) 1))
85
86 -- | TODO : use Occurrences in place of Int
87 extractNgramsT :: HyperdataContact
88 -> Map (NgramsT Ngrams) Int
89 extractNgramsT contact = fromList [(NgramsT Authors a' , 1)| a' <- authors ]
90 where
91 authors = map text2ngrams
92 $ catMaybes [ contact^.(hc_who . _Just . cw_lastName) ]
93
94
95 pairMaps :: Map (NgramsT Ngrams) a
96 -> Map (NgramsT Ngrams) NgramsId
97 -> Map NgramsIndexed (Map NgramsType a)
98 pairMaps m1 m2 =
99 DM.fromList
100 [ (NgramsIndexed ng nId, DM.singleton nt n2i)
101 | (k@(NgramsT nt ng),n2i) <- DM.toList m1
102 , Just nId <- [DM.lookup k m2]
103 ]
104
105 -----------------------------------------------------------------------
106 getNgramsTindexed :: CorpusId
107 -> NgramsType
108 -> Cmd err (Map (NgramsT Ngrams) NgramsId)
109 getNgramsTindexed corpusId ngramsType' = fromList
110 <$> map (\(ngramsId',t,n) -> (NgramsT ngramsType' (Ngrams t n),ngramsId'))
111 <$> selectNgramsTindexed corpusId ngramsType'
112 where
113 selectNgramsTindexed :: CorpusId
114 -> NgramsType
115 -> Cmd err [(NgramsId, Terms, Int)]
116 selectNgramsTindexed corpusId' ngramsType'' = runPGSQuery selectQuery (corpusId', ngramsTypeId ngramsType'')
117 where
118 selectQuery = [sql| SELECT n.id,n.terms,n.n from ngrams n
119 JOIN node_node_ngrams occ ON occ.ngrams_id = n.id
120 -- JOIN node_node_ngrams2 occ ON occ.ngrams_id = n.id
121 JOIN nodes_nodes nn ON nn.node2_id = occ.node2_id
122
123 WHERE nn.node1_id = ?
124 AND occ.ngrams_type = ?
125 AND occ.node2_id = nn.node2_id
126 GROUP BY n.id;
127 |]
128
129 {- | TODO more typed SQL queries
130 selectNgramsTindexed :: CorpusId -> NgramsType -> Query NgramsRead
131 selectNgramsTindexed corpusId ngramsType = proc () -> do
132 nodeNode <- queryNodeNodeTable -< ()
133 nodeNgrams <- queryNodesNgramsTable -< ()
134 ngrams <- queryNgramsTable -< ()
135
136 restrict -< node1_id nodeNode .== pgInt4 corpusId
137 restrict -< node2_id nodeNode .== node_id nodeNgrams
138 restrict -< ngrams_id ngrams .== node_ngrams nodeNgrams
139
140 result <- aggregate groupBy (ngrams_id ngrams)
141 returnA -< result
142 --}