]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Flow/Utils.hs
[PAIR][WIP] NodeNgrams -> NodeNodeNgrams, needs tests.
[gargantext.git] / src / Gargantext / Database / Flow / Utils.hs
1 {-|
2 Module : Gargantext.Database.Flow.Utils
3 Description : Database Flow
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE NoImplicitPrelude #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE RankNTypes #-}
15
16 module Gargantext.Database.Flow.Utils
17 where
18
19 import Data.Map (Map)
20 import qualified Data.Map as DM
21 import Gargantext.Prelude
22 import Gargantext.Database.Schema.Ngrams
23 import Gargantext.Database.Types.Node (NodeId, Node, NodePoly(..), Hyperdata)
24 import Gargantext.Database.Utils (Cmd)
25 import Gargantext.Database.Schema.NodeNodeNgrams
26 import Gargantext.Database.Types.Node
27
28 toMaps :: Hyperdata a => (a -> Map (NgramsT Ngrams) Int) -> [Node a] -> Map (NgramsT Ngrams) (Map NodeId Int)
29 toMaps fun ns = mapNodeIdNgrams $ documentIdWithNgrams fun ns'
30 where
31 ns' = map (\(Node nId _ _ _ _ _ json) -> DocumentWithId nId json) ns
32
33 mapNodeIdNgrams :: Hyperdata a => [DocumentIdWithNgrams a] -> Map (NgramsT Ngrams) (Map NodeId Int)
34 mapNodeIdNgrams ds = DM.map (DM.fromListWith (+)) $ DM.fromListWith (<>) xs
35 where
36 xs = [(ng, [(nId, i)]) | (nId, n2i') <- n2i ds, (ng, i) <- DM.toList n2i']
37 n2i = map (\d -> ((documentId . documentWithId) d, document_ngrams d))
38
39
40 documentIdWithNgrams :: Hyperdata a
41 => (a -> Map (NgramsT Ngrams) Int)
42 -> [DocumentWithId a]
43 -> [DocumentIdWithNgrams a]
44 documentIdWithNgrams f = map (\d -> DocumentIdWithNgrams d ((f . documentData) d))
45
46
47 data DocumentWithId a =
48 DocumentWithId { documentId :: NodeId
49 , documentData :: a
50 } deriving (Show)
51
52
53 data DocumentIdWithNgrams a =
54 DocumentIdWithNgrams
55 { documentWithId :: DocumentWithId a
56 , document_ngrams :: Map (NgramsT Ngrams) Int
57 } deriving (Show)
58
59
60 docNgrams2nodeNodeNgrams :: CorpusId
61 -> DocNgrams
62 -> NodeNodeNgrams
63 docNgrams2nodeNodeNgrams cId (DocNgrams d n nt w) =
64 NodeNodeNgrams Nothing cId d n nt w
65
66 data DocNgrams = DocNgrams { dn_doc_id :: DocId
67 , dn_ngrams_id :: Int
68 , dn_ngrams_type :: NgramsTypeId
69 , dn_weight :: Double
70 }
71
72 insertDocNgramsOn :: CorpusId
73 -> [DocNgrams]
74 -> Cmd err Int
75 insertDocNgramsOn cId dn = insertNodeNodeNgrams $ (map (docNgrams2nodeNodeNgrams cId) dn)
76
77 insertDocNgrams :: CorpusId
78 -> Map NgramsIndexed (Map NgramsType (Map NodeId Int))
79 -> Cmd err Int
80 insertDocNgrams cId m = insertDocNgramsOn cId [ DocNgrams n (_ngramsId ng) (ngramsTypeId t) (fromIntegral i)
81 | (ng, t2n2i) <- DM.toList m
82 , (t, n2i) <- DM.toList t2n2i
83 , (n, i) <- DM.toList n2i
84 ]
85