]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Flow/Pairing.hs
[TERMS] clean/refacto before integration to workflow.
[gargantext.git] / src / Gargantext / Database / Flow / Pairing.hs
1 {-|
2 Module : Gargantext.Database.Flow
3 Description : Database Flow
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE QuasiQuotes #-}
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE OverloadedStrings #-}
15 {-# LANGUAGE RankNTypes #-}
16 -- {-# LANGUAGE Arrows #-}
17
18 module Gargantext.Database.Flow.Pairing
19 where
20
21 --import Debug.Trace (trace)
22 import Control.Lens (_Just,view)
23 import Database.PostgreSQL.Simple.SqlQQ (sql)
24 -- import Opaleye
25 -- import Opaleye.Aggregate
26 -- import Control.Arrow (returnA)
27 import Data.Maybe (catMaybes)
28 import Data.Map (Map, fromList)
29 import Safe (lastMay)
30 import qualified Data.Map as DM
31 import Data.Text (Text, toLower)
32 import qualified Data.Text as DT
33 import Gargantext.Prelude hiding (sum)
34 import Gargantext.Database.Schema.Ngrams -- (NgramsType(..))
35 --import Gargantext.Database.Node.Contact -- (HyperdataContact(..))
36 --import Gargantext.Database.Types.Node -- (Hyperdata(..))
37 import Gargantext.Database.Node.Contact
38 import Gargantext.Database.Flow.Utils
39 import Gargantext.Database.Utils (Cmd, runPGSQuery)
40 import Gargantext.Database.Types.Node (AnnuaireId, CorpusId)
41 import Gargantext.Database.Node.Children
42 import Gargantext.Core.Types (NodeType(..))
43
44 -- TODO mv this type in Types Main
45 type Terms = Text
46
47 -- | TODO : add paring policy as parameter
48 pairing :: AnnuaireId -> CorpusId -> Cmd err Int
49 pairing aId cId = do
50 contacts' <- getContacts aId (Just NodeContact)
51 let contactsMap = pairingPolicyToMap toLower $ toMaps extractNgramsT contacts'
52
53 ngramsMap' <- getNgramsTindexed cId Authors
54 let ngramsMap = pairingPolicyToMap lastName ngramsMap'
55
56 let indexedNgrams = pairMaps contactsMap ngramsMap
57
58 insertToNodeNgrams indexedNgrams
59 -- TODO add List
60
61 lastName :: Terms -> Terms
62 lastName texte = DT.toLower $ maybe texte (\x -> if DT.length x > 3 then x else texte) (lastName' texte)
63 where
64 lastName' = lastMay . DT.splitOn " "
65
66 -- TODO: this methods is dangerous (maybe equalities of the result are not taken into account
67 -- emergency demo plan...
68 pairingPolicyToMap :: (Terms -> Terms)
69 -> Map (NgramsT Ngrams) a -> Map (NgramsT Ngrams) a
70 pairingPolicyToMap f = DM.mapKeys (pairingPolicy f)
71
72 pairingPolicy :: (Terms -> Terms) -> NgramsT Ngrams -> NgramsT Ngrams
73 pairingPolicy f (NgramsT nt (Ngrams ng _)) = (NgramsT nt (Ngrams (f ng) 1))
74
75 -- | TODO : use Occurrences in place of Int
76 extractNgramsT :: HyperdataContact -> Map (NgramsT Ngrams) Int
77 extractNgramsT contact = fromList [(NgramsT Authors a' , 1)| a' <- authors ]
78 where
79 authors = map text2ngrams $ catMaybes [view (hc_who . _Just . cw_lastName) contact]
80 --}
81
82 -- NP: notice how this function is no longer specific to the ContactId type
83 pairMaps :: Map (NgramsT Ngrams) a
84 -> Map (NgramsT Ngrams) NgramsId
85 -> Map NgramsIndexed (Map NgramsType a)
86 pairMaps m1 m2 =
87 DM.fromList
88 [ (NgramsIndexed ng nId, DM.singleton nt n2i)
89 | (k@(NgramsT nt ng),n2i) <- DM.toList m1
90 , Just nId <- [DM.lookup k m2]
91 ]
92
93 -----------------------------------------------------------------------
94 getNgramsTindexed:: CorpusId -> NgramsType -> Cmd err (Map (NgramsT Ngrams) NgramsId)
95 getNgramsTindexed corpusId ngramsType' = fromList
96 <$> map (\(ngramsId',t,n) -> (NgramsT ngramsType' (Ngrams t n),ngramsId'))
97 <$> selectNgramsTindexed corpusId ngramsType'
98
99 selectNgramsTindexed :: CorpusId -> NgramsType -> Cmd err [(NgramsId, Terms, Int)]
100 selectNgramsTindexed corpusId ngramsType'' = runPGSQuery selectQuery (corpusId, ngramsTypeId ngramsType'')
101 where
102 selectQuery = [sql| SELECT n.id,n.terms,n.n from ngrams n
103 JOIN nodes_ngrams occ ON occ.ngram_id = n.id
104 JOIN nodes_nodes nn ON nn.node2_id = occ.node_id
105
106 WHERE nn.node1_id = ?
107 AND occ.ngrams_type = ?
108 AND occ.node_id = nn.node2_id
109 GROUP BY n.id;
110 |]
111
112 {- | TODO more typed SQL queries
113 selectNgramsTindexed :: CorpusId -> NgramsType -> Query NgramsRead
114 selectNgramsTindexed corpusId ngramsType = proc () -> do
115 nodeNode <- queryNodeNodeTable -< ()
116 nodeNgrams <- queryNodesNgramsTable -< ()
117 ngrams <- queryNgramsTable -< ()
118
119 restrict -< node1_id nodeNode .== pgInt4 corpusId
120 restrict -< node2_id nodeNode .== node_id nodeNgrams
121 restrict -< ngrams_id ngrams .== node_ngrams nodeNgrams
122
123 result <- aggregate groupBy (ngrams_id ngrams)
124 returnA -< result
125 --}
126
127