]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams/Tools.hs
Merge branch 'dev' into dev-phylo
[gargantext.git] / src / Gargantext / API / Ngrams / Tools.hs
1 {-|
2 Module : Gargantext.API.Ngrams.Tools
3 Description : Tools to manage Ngrams Elements (from the API)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE NoImplicitPrelude #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE RankNTypes #-}
15
16 module Gargantext.API.Ngrams.Tools
17 where
18
19 import Control.Concurrent
20 import Control.Lens (_Just, (^.), at, view)
21 import Control.Monad.Reader
22 import Data.Map.Strict (Map)
23 import Data.Set (Set)
24 import Data.Text (Text)
25 import Data.Validity
26 import Gargantext.API.Ngrams
27 import Gargantext.Core.Types (ListType(..), NodeId, ListId)
28 import Gargantext.Database.Schema.Ngrams (NgramsType)
29 import Gargantext.Prelude
30 import qualified Data.Map.Strict as Map
31 import qualified Data.Set as Set
32
33
34 type RootTerm = Text
35
36 getListNgrams :: RepoCmdM env err m
37 => [ListId] -> NgramsType
38 -> m (Map Text NgramsRepoElement)
39 getListNgrams nodeIds ngramsType = do
40 v <- view repoVar
41 repo <- liftIO $ readMVar v
42
43 let
44 ngramsMap = repo ^. r_state . at ngramsType . _Just
45
46 ngrams = Map.unionsWith mergeNgramsElement
47 [ ngramsMap ^. at nodeId . _Just | nodeId <- nodeIds ]
48
49 pure ngrams
50
51 getTermsWith :: (RepoCmdM env err m, Ord a)
52 => (Text -> a ) -> [ListId]
53 -> NgramsType -> ListType
54 -> m (Map a [a])
55 getTermsWith f ls ngt lt = Map.fromListWith (<>)
56 <$> map (toTreeWith f)
57 <$> Map.toList
58 <$> Map.filter (\f' -> (fst f') == lt)
59 <$> mapTermListRoot ls ngt
60 where
61 toTreeWith f'' (t, (_lt, maybeRoot)) = case maybeRoot of
62 Nothing -> (f'' t, [])
63 Just r -> (f'' r, map f'' [t])
64
65 mapTermListRoot :: RepoCmdM env err m
66 => [ListId] -> NgramsType
67 -> m (Map Text (ListType, (Maybe Text)))
68 mapTermListRoot nodeIds ngramsType = do
69 ngrams <- getListNgrams nodeIds ngramsType
70 pure $ Map.fromList [ (t, (_nre_list nre, _nre_root nre))
71 | (t, nre) <- Map.toList ngrams
72 ]
73
74 filterListWithRoot :: ListType -> Map Text (ListType, Maybe Text)
75 -> Map Text (Maybe RootTerm)
76 filterListWithRoot lt m = Map.fromList
77 $ map (\(t,(_,r)) -> (t,r))
78 $ filter isGraphTerm (Map.toList m)
79 where
80 isGraphTerm (_t,(l, maybeRoot)) = case maybeRoot of
81 Nothing -> l == lt
82 Just r -> case Map.lookup r m of
83 Nothing -> panic $ "Garg.API.Ngrams.Tools: filterWithRoot, unknown key: " <> r
84 Just (l',_) -> l' == lt
85
86 groupNodesByNgrams :: Map Text (Maybe RootTerm)
87 -> Map Text (Set NodeId)
88 -> Map Text (Set NodeId)
89 groupNodesByNgrams syn occs = Map.fromListWith (<>) occs'
90 where
91 occs' = map toSyn (Map.toList occs)
92 toSyn (t,ns) = case Map.lookup t syn of
93 Nothing -> panic $ "[Garg.API.Ngrams.Tools.groupNodesByNgrams] unknown key: " <> t
94 Just r -> case r of
95 Nothing -> (t, ns)
96 Just r' -> (r',ns)
97
98 data Diagonal = Diagonal Bool
99
100 getCoocByNgrams :: Diagonal -> Map Text (Set NodeId) -> Map (Text, Text) Int
101 getCoocByNgrams = getCoocByNgrams' identity
102
103
104 getCoocByNgrams' :: (Ord a, Ord c) => (b -> Set c) -> Diagonal -> Map a b -> Map (a, a) Int
105 getCoocByNgrams' f (Diagonal diag) m =
106 Map.fromList [( (t1,t2)
107 , maybe 0 Set.size $ Set.intersection
108 <$> (fmap f $ Map.lookup t1 m)
109 <*> (fmap f $ Map.lookup t2 m)
110 ) | (t1,t2) <- case diag of
111 True -> [ (x,y) | x <- Map.keys m, y <- Map.keys m, x <= y]
112 False -> listToCombi identity (Map.keys m)
113 ]
114