]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams/Tools.hs
[FEAT] API new corpus files_id as parameters.
[gargantext.git] / src / Gargantext / API / Ngrams / Tools.hs
1 {-|
2 Module : Gargantext.API.Ngrams.Tools
3 Description : Tools to manage Ngrams Elements (from the API)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE NoImplicitPrelude #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE RankNTypes #-}
15
16 module Gargantext.API.Ngrams.Tools
17 where
18
19 import Control.Concurrent
20 import Control.Lens (_Just, (^.), at, view)
21 import Control.Monad.Reader
22 import Data.Map.Strict (Map)
23 import Data.Set (Set)
24 import Data.Text (Text)
25 import Data.Validity
26 import Gargantext.API.Ngrams
27 import Gargantext.Core.Types (ListType(..), NodeId, ListId)
28 import Gargantext.Database.Schema.Ngrams (NgramsType)
29 import Gargantext.Prelude
30 import qualified Data.Map.Strict as Map
31 import qualified Data.Set as Set
32
33
34 type RootTerm = Text
35
36
37 getListNgrams :: RepoCmdM env err m
38 => [ListId] -> NgramsType
39 -> m (Map Text NgramsRepoElement)
40 getListNgrams nodeIds ngramsType = do
41 v <- view repoVar
42 repo <- liftIO $ readMVar v
43
44 let
45 ngramsMap = repo ^. r_state . at ngramsType . _Just
46
47 ngrams = Map.unionsWith mergeNgramsElement
48 [ ngramsMap ^. at nodeId . _Just | nodeId <- nodeIds ]
49
50 pure ngrams
51
52 mapTermListRoot :: RepoCmdM env err m
53 => [ListId] -> NgramsType
54 -> m (Map Text (ListType, (Maybe Text)))
55 mapTermListRoot nodeIds ngramsType = do
56 ngrams <- getListNgrams nodeIds ngramsType
57 pure $ Map.fromList [(t, (_nre_list nre, _nre_root nre))
58 | (t, nre) <- Map.toList ngrams
59 ]
60
61 filterListWithRoot :: ListType -> Map Text (ListType, Maybe Text)
62 -> Map Text (Maybe RootTerm)
63 filterListWithRoot lt m = Map.fromList
64 $ map (\(t,(_,r)) -> (t,r))
65 $ filter isGraphTerm (Map.toList m)
66 where
67 isGraphTerm (_t,(l, maybeRoot)) = case maybeRoot of
68 Nothing -> l == lt
69 Just r -> case Map.lookup r m of
70 Nothing -> panic $ "Garg.API.Ngrams.Tools: filterWithRoot, unknown key: " <> r
71 Just (l',_) -> l' == lt
72
73 groupNodesByNgrams :: Map Text (Maybe RootTerm)
74 -> Map Text (Set NodeId)
75 -> Map Text (Set NodeId)
76 groupNodesByNgrams syn occs = Map.fromListWith (<>) occs'
77 where
78 occs' = map toSyn (Map.toList occs)
79 toSyn (t,ns) = case Map.lookup t syn of
80 Nothing -> panic $ "[Garg.API.Ngrams.Tools.groupNodesByNgrams] unknown key: " <> t
81 Just r -> case r of
82 Nothing -> (t, ns)
83 Just r' -> (r',ns)
84
85 data Diagonal = Diagonal Bool
86
87 getCoocByNgrams :: Diagonal -> Map Text (Set NodeId) -> Map (Text, Text) Int
88 getCoocByNgrams (Diagonal diag) m =
89 Map.fromList [((t1,t2)
90 ,maybe 0 Set.size $ Set.intersection
91 <$> Map.lookup t1 m
92 <*> Map.lookup t2 m
93 ) | (t1,t2) <- case diag of
94 True -> [ (x,y) | x <- Map.keys m, y <- Map.keys m, x <= y]
95 False -> listToCombi identity (Map.keys m)
96 ]
97