]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Flow/Pairing.hs
[GRAPH] removing trace + unconnected nodes (some left still).
[gargantext.git] / src / Gargantext / Database / Flow / Pairing.hs
1 {-|
2 Module : Gargantext.Database.Flow
3 Description : Database Flow
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE QuasiQuotes #-}
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE OverloadedStrings #-}
15 {-# LANGUAGE RankNTypes #-}
16 -- {-# LANGUAGE Arrows #-}
17
18 module Gargantext.Database.Flow.Pairing
19 (pairing)
20 where
21
22 --import Debug.Trace (trace)
23 import Control.Lens (_Just, (^.))
24 import Database.PostgreSQL.Simple.SqlQQ (sql)
25 -- import Opaleye
26 -- import Opaleye.Aggregate
27 -- import Control.Arrow (returnA)
28 import Data.Maybe (catMaybes)
29 import Data.Map (Map, fromList)
30 import Safe (lastMay)
31 import qualified Data.Map as DM
32 import Data.Text (Text, toLower)
33 import qualified Data.Text as DT
34 import Gargantext.Prelude hiding (sum)
35 import Gargantext.Database.Schema.Ngrams -- (NgramsType(..))
36 import Gargantext.Database.Node.Contact -- (HyperdataContact(..))
37 import Gargantext.Database.Flow.Utils
38 import Gargantext.Database.Utils (Cmd, runPGSQuery)
39 import Gargantext.Database.Types.Node (AnnuaireId, CorpusId, ListId)
40 import Gargantext.Database.Node.Children (getAllContacts)
41
42 -- TODO mv this type in Types Main
43 type Terms = Text
44
45 -- | TODO : add paring policy as parameter
46 pairing :: AnnuaireId
47 -> CorpusId
48 -> ListId
49 -> Cmd err Int
50 pairing aId cId lId = do
51 contacts' <- getAllContacts aId
52 let contactsMap = pairingPolicyToMap toLower
53 $ toMaps extractNgramsT contacts'
54
55 ngramsMap' <- getNgramsTindexed cId Authors
56 let ngramsMap = pairingPolicyToMap lastName ngramsMap'
57
58 let indexedNgrams = pairMaps contactsMap ngramsMap
59
60 insertDocNgrams lId indexedNgrams
61
62 lastName :: Terms -> Terms
63 lastName texte = DT.toLower
64 $ maybe texte (\x -> if DT.length x > 3 then x else texte) (lastName' texte)
65 where
66 lastName' = lastMay . DT.splitOn " "
67
68 -- TODO: this method is dangerous (maybe equalities of the result are not taken into account
69 -- emergency demo plan...)
70 pairingPolicyToMap :: (Terms -> Terms)
71 -> Map (NgramsT Ngrams) a
72 -> Map (NgramsT Ngrams) a
73 pairingPolicyToMap f = DM.mapKeys (pairingPolicy f)
74
75 pairingPolicy :: (Terms -> Terms)
76 -> NgramsT Ngrams
77 -> NgramsT Ngrams
78 pairingPolicy f (NgramsT nt (Ngrams ng _)) = (NgramsT nt (Ngrams (f ng) 1))
79
80 -- | TODO : use Occurrences in place of Int
81 extractNgramsT :: HyperdataContact
82 -> Map (NgramsT Ngrams) Int
83 extractNgramsT contact = fromList [(NgramsT Authors a' , 1)| a' <- authors ]
84 where
85 authors = map text2ngrams
86 $ catMaybes [ contact^.(hc_who . _Just . cw_lastName) ]
87
88
89 pairMaps :: Map (NgramsT Ngrams) a
90 -> Map (NgramsT Ngrams) NgramsId
91 -> Map NgramsIndexed (Map NgramsType a)
92 pairMaps m1 m2 =
93 DM.fromList
94 [ (NgramsIndexed ng nId, DM.singleton nt n2i)
95 | (k@(NgramsT nt ng),n2i) <- DM.toList m1
96 , Just nId <- [DM.lookup k m2]
97 ]
98
99 -----------------------------------------------------------------------
100 getNgramsTindexed :: CorpusId
101 -> NgramsType
102 -> Cmd err (Map (NgramsT Ngrams) NgramsId)
103 getNgramsTindexed corpusId ngramsType' = fromList
104 <$> map (\(ngramsId',t,n) -> (NgramsT ngramsType' (Ngrams t n),ngramsId'))
105 <$> selectNgramsTindexed corpusId ngramsType'
106 where
107 selectNgramsTindexed :: CorpusId
108 -> NgramsType
109 -> Cmd err [(NgramsId, Terms, Int)]
110 selectNgramsTindexed corpusId' ngramsType'' = runPGSQuery selectQuery (corpusId', ngramsTypeId ngramsType'')
111 where
112 selectQuery = [sql| SELECT n.id,n.terms,n.n from ngrams n
113 JOIN node_node_ngrams occ ON occ.ngrams_id = n.id
114 JOIN nodes_nodes nn ON nn.node2_id = occ.node2_id
115
116 WHERE nn.node1_id = ?
117 AND occ.ngrams_type = ?
118 AND occ.node2_id = nn.node2_id
119 GROUP BY n.id;
120 |]
121
122 {- | TODO more typed SQL queries
123 selectNgramsTindexed :: CorpusId -> NgramsType -> Query NgramsRead
124 selectNgramsTindexed corpusId ngramsType = proc () -> do
125 nodeNode <- queryNodeNodeTable -< ()
126 nodeNgrams <- queryNodesNgramsTable -< ()
127 ngrams <- queryNgramsTable -< ()
128
129 restrict -< node1_id nodeNode .== pgInt4 corpusId
130 restrict -< node2_id nodeNode .== node_id nodeNgrams
131 restrict -< ngrams_id ngrams .== node_ngrams nodeNgrams
132
133 result <- aggregate groupBy (ngrams_id ngrams)
134 returnA -< result
135 --}