]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Flow/Pairing.hs
[PAIR][WIP] NodeNgrams -> NodeNodeNgrams, needs tests.
[gargantext.git] / src / Gargantext / Database / Flow / Pairing.hs
1 {-|
2 Module : Gargantext.Database.Flow
3 Description : Database Flow
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE QuasiQuotes #-}
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE OverloadedStrings #-}
15 {-# LANGUAGE RankNTypes #-}
16 -- {-# LANGUAGE Arrows #-}
17
18 module Gargantext.Database.Flow.Pairing
19 (pairing)
20 where
21
22 --import Debug.Trace (trace)
23 import Control.Lens (_Just,view)
24 import Database.PostgreSQL.Simple.SqlQQ (sql)
25 -- import Opaleye
26 -- import Opaleye.Aggregate
27 -- import Control.Arrow (returnA)
28 import Data.Maybe (catMaybes)
29 import Data.Map (Map, fromList)
30 import Safe (lastMay)
31 import qualified Data.Map as DM
32 import Data.Text (Text, toLower)
33 import qualified Data.Text as DT
34 import Gargantext.Prelude hiding (sum)
35 import Gargantext.Database.Schema.Ngrams -- (NgramsType(..))
36 import Gargantext.Database.Node.Contact -- (HyperdataContact(..))
37 import Gargantext.Database.Flow.Utils
38 import Gargantext.Database.Utils (Cmd, runPGSQuery)
39 import Gargantext.Database.Types.Node (AnnuaireId, CorpusId, ListId)
40 import Gargantext.Database.Node.Children (getContacts)
41 import Gargantext.Core.Types (NodeType(..))
42
43 -- TODO mv this type in Types Main
44 type Terms = Text
45
46 -- | TODO : add paring policy as parameter
47 pairing :: AnnuaireId
48 -> CorpusId
49 -> ListId
50 -> Cmd err Int
51 pairing aId cId lId = do
52 contacts' <- getContacts aId (Just NodeContact)
53 let contactsMap = pairingPolicyToMap toLower $ toMaps extractNgramsT contacts'
54
55 ngramsMap' <- getNgramsTindexed cId Authors
56 let ngramsMap = pairingPolicyToMap lastName ngramsMap'
57
58 let indexedNgrams = pairMaps contactsMap ngramsMap
59
60 insertDocNgrams lId indexedNgrams
61
62 lastName :: Terms -> Terms
63 lastName texte = DT.toLower
64 $ maybe texte (\x -> if DT.length x > 3 then x else texte) (lastName' texte)
65 where
66 lastName' = lastMay . DT.splitOn " "
67
68 -- TODO: this method is dangerous (maybe equalities of the result are not taken into account
69 -- emergency demo plan...)
70 pairingPolicyToMap :: (Terms -> Terms)
71 -> Map (NgramsT Ngrams) a
72 -> Map (NgramsT Ngrams) a
73 pairingPolicyToMap f = DM.mapKeys (pairingPolicy f)
74
75 pairingPolicy :: (Terms -> Terms)
76 -> NgramsT Ngrams
77 -> NgramsT Ngrams
78 pairingPolicy f (NgramsT nt (Ngrams ng _)) = (NgramsT nt (Ngrams (f ng) 1))
79
80 -- | TODO : use Occurrences in place of Int
81 extractNgramsT :: HyperdataContact
82 -> Map (NgramsT Ngrams) Int
83 extractNgramsT contact = fromList [(NgramsT Authors a' , 1)| a' <- authors ]
84 where
85 authors = map text2ngrams $ catMaybes [view (hc_who . _Just . cw_lastName) contact]
86
87
88 pairMaps :: Map (NgramsT Ngrams) a
89 -> Map (NgramsT Ngrams) NgramsId
90 -> Map NgramsIndexed (Map NgramsType a)
91 pairMaps m1 m2 =
92 DM.fromList
93 [ (NgramsIndexed ng nId, DM.singleton nt n2i)
94 | (k@(NgramsT nt ng),n2i) <- DM.toList m1
95 , Just nId <- [DM.lookup k m2]
96 ]
97
98 -----------------------------------------------------------------------
99 getNgramsTindexed :: CorpusId
100 -> NgramsType
101 -> Cmd err (Map (NgramsT Ngrams) NgramsId)
102 getNgramsTindexed corpusId ngramsType' = fromList
103 <$> map (\(ngramsId',t,n) -> (NgramsT ngramsType' (Ngrams t n),ngramsId'))
104 <$> selectNgramsTindexed corpusId ngramsType'
105 where
106 selectNgramsTindexed :: CorpusId
107 -> NgramsType
108 -> Cmd err [(NgramsId, Terms, Int)]
109 selectNgramsTindexed corpusId' ngramsType'' = runPGSQuery selectQuery (corpusId', ngramsTypeId ngramsType'')
110 where
111 selectQuery = [sql| SELECT n.id,n.terms,n.n from ngrams n
112 JOIN node_node_ngrams occ ON occ.ngrams_id = n.id
113 JOIN nodes_nodes nn ON nn.node2_id = occ.node2_id
114
115 WHERE nn.node1_id = ?
116 AND occ.ngrams_type = ?
117 AND occ.node2_id = nn.node2_id
118 GROUP BY n.id;
119 |]
120
121 {- | TODO more typed SQL queries
122 selectNgramsTindexed :: CorpusId -> NgramsType -> Query NgramsRead
123 selectNgramsTindexed corpusId ngramsType = proc () -> do
124 nodeNode <- queryNodeNodeTable -< ()
125 nodeNgrams <- queryNodesNgramsTable -< ()
126 ngrams <- queryNgramsTable -< ()
127
128 restrict -< node1_id nodeNode .== pgInt4 corpusId
129 restrict -< node2_id nodeNode .== node_id nodeNgrams
130 restrict -< ngrams_id ngrams .== node_ngrams nodeNgrams
131
132 result <- aggregate groupBy (ngrams_id ngrams)
133 returnA -< result
134 --}