]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Flow/Utils.hs
fix the freq
[gargantext.git] / src / Gargantext / Database / Flow / Utils.hs
1 {-|
2 Module : Gargantext.Database.Flow.Utils
3 Description : Database Flow
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE NoImplicitPrelude #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE RankNTypes #-}
15
16 module Gargantext.Database.Flow.Utils
17 where
18
19 import Data.Map (Map)
20 import qualified Data.Map as DM
21 import Gargantext.Prelude
22 import Gargantext.Database.Schema.Ngrams
23 import Gargantext.Database.Types.Node (NodeId, Node, NodePoly(..), Hyperdata)
24 import Gargantext.Database.Utils (Cmd)
25 import Gargantext.Database.Schema.NodeNgram
26 import Gargantext.Database.Schema.NodeNodeNgrams
27 import Gargantext.Database.Types.Node
28 import Gargantext.Core.Types.Main (ListType(..), listTypeId)
29
30 toMaps :: Hyperdata a => (a -> Map (NgramsT Ngrams) Int) -> [Node a] -> Map (NgramsT Ngrams) (Map NodeId Int)
31 toMaps fun ns = mapNodeIdNgrams $ documentIdWithNgrams fun ns'
32 where
33 ns' = map (\(Node nId _ _ _ _ _ json) -> DocumentWithId nId json) ns
34
35 mapNodeIdNgrams :: Hyperdata a => [DocumentIdWithNgrams a] -> Map (NgramsT Ngrams) (Map NodeId Int)
36 mapNodeIdNgrams ds = DM.map (DM.fromListWith (+)) $ DM.fromListWith (<>) xs
37 where
38 xs = [(ng, [(nId, i)]) | (nId, n2i') <- n2i ds, (ng, i) <- DM.toList n2i']
39 n2i = map (\d -> ((documentId . documentWithId) d, document_ngrams d))
40
41
42 documentIdWithNgrams :: Hyperdata a => (a -> Map (NgramsT Ngrams) Int)
43 -> [DocumentWithId a] -> [DocumentIdWithNgrams a]
44 documentIdWithNgrams f = map (\d -> DocumentIdWithNgrams d ((f . documentData) d))
45
46
47 data DocumentWithId a =
48 DocumentWithId { documentId :: NodeId
49 , documentData :: a
50 } deriving (Show)
51
52
53 data DocumentIdWithNgrams a =
54 DocumentIdWithNgrams
55 { documentWithId :: DocumentWithId a
56 , document_ngrams :: Map (NgramsT Ngrams) Int
57 } deriving (Show)
58
59 -- | TODO for now, list Type is CandidateTerm because Graph Terms
60 -- have to be detected in next step in the flow
61 -- TODO remvoe this
62 insertToNodeNgrams :: Map NgramsIndexed (Map NgramsType (Map NodeId Int)) -> Cmd err Int
63 insertToNodeNgrams m = insertNodeNgrams [ NodeNgram n (_ngramsId ng) Nothing (ngramsTypeId t) (listTypeId CandidateTerm) (fromIntegral i)
64 | (ng, t2n2i) <- DM.toList m
65 , (t, n2i) <- DM.toList t2n2i
66 , (n, i) <- DM.toList n2i
67 ]
68
69
70 docNgrams2nodeNodeNgrams :: CorpusId -> DocNgrams -> NodeNodeNgrams
71 docNgrams2nodeNodeNgrams cId (DocNgrams d n nt w) = NodeNodeNgrams Nothing cId d n nt w
72
73 data DocNgrams = DocNgrams { dn_doc_id :: DocId
74 , dn_ngrams_id :: Int
75 , dn_ngrams_type :: NgramsTypeId
76 , dn_weight :: Double
77 }
78
79 insertDocNgramsOn :: CorpusId -> [DocNgrams] -> Cmd err Int
80 insertDocNgramsOn cId dn = insertNodeNodeNgrams $ (map (docNgrams2nodeNodeNgrams cId) dn)
81
82 insertDocNgrams :: CorpusId -> Map NgramsIndexed (Map NgramsType (Map NodeId Int)) -> Cmd err Int
83 insertDocNgrams cId m = insertDocNgramsOn cId [ DocNgrams n (_ngramsId ng) (ngramsTypeId t) (fromIntegral i)
84 | (ng, t2n2i) <- DM.toList m
85 , (t, n2i) <- DM.toList t2n2i
86 , (n, i) <- DM.toList n2i
87 ]
88