]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/List/Group.hs
[FEAT] Social lists, connection (WIP)
[gargantext.git] / src / Gargantext / Core / Text / List / Group.hs
1 {-|
2 Module : Gargantext.Core.Text.List.Group
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE TemplateHaskell #-}
13 {-# LANGUAGE ConstraintKinds #-}
14 {-# LANGUAGE TypeFamilies #-}
15 {-# LANGUAGE FunctionalDependencies #-}
16
17
18 module Gargantext.Core.Text.List.Group
19 where
20
21 import Data.Maybe (fromMaybe)
22 import Control.Lens (makeLenses, set, (^.))
23 import Data.Set (Set)
24 import Data.Map (Map)
25 import Data.Text (Text)
26 import Data.Semigroup (Semigroup, (<>))
27 import Gargantext.Core (Lang(..))
28 import Gargantext.Core.Text (size)
29 import Gargantext.Core.Types (ListType(..)) -- (MasterCorpusId, UserCorpusId)
30 import Gargantext.Database.Admin.Types.Node (NodeId)
31 -- import Gargantext.Core.Text.List.Learn (Model(..))
32 import Gargantext.Core.Text.List.Social.Scores (FlowListScores(..), flc_lists, flc_parents, keyWithMaxValue)
33 import Gargantext.Core.Text.Terms.Mono.Stem (stem)
34 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
35 import Gargantext.Prelude
36 import qualified Data.Set as Set
37 import qualified Data.Map as Map
38 import qualified Data.List as List
39 import qualified Data.Text as Text
40
41 {-
42 data NgramsListBuilder = BuilderStepO { stemSize :: !Int
43 , stemX :: !Int
44 , stopSize :: !StopSize
45 }
46 | BuilderStep1 { withModel :: !Model }
47 | BuilderStepN { withModel :: !Model }
48 | Tficf { nlb_lang :: !Lang
49 , nlb_group1 :: !Int
50 , nlb_group2 :: !Int
51 , nlb_stopSize :: !StopSize
52 , nlb_userCorpusId :: !UserCorpusId
53 , nlb_masterCorpusId :: !MasterCorpusId
54 }
55 -}
56
57 data StopSize = StopSize {unStopSize :: !Int}
58
59 -- | TODO: group with 2 terms only can be
60 -- discussed. Main purpose of this is offering
61 -- a first grouping option to user and get some
62 -- enriched data to better learn and improve that algo
63 data GroupParams = GroupParams { unGroupParams_lang :: !Lang
64 , unGroupParams_len :: !Int
65 , unGroupParams_limit :: !Int
66 , unGroupParams_stopSize :: !StopSize
67 }
68 | GroupIdentity
69
70 ngramsGroup :: GroupParams
71 -> Text
72 -> Text
73 ngramsGroup GroupIdentity = identity
74 ngramsGroup (GroupParams l _m _n _) =
75 Text.intercalate " "
76 . map (stem l)
77 -- . take n
78 . List.sort
79 -- . (List.filter (\t -> Text.length t > m))
80 . Text.splitOn " "
81 . Text.replace "-" " "
82
83 ------------------------------------------------------------------------
84 data GroupedTextParams a b =
85 GroupedTextParams { _gt_fun_stem :: Text -> Text
86 , _gt_fun_score :: a -> b
87 , _gt_fun_texts :: a -> Set Text
88 , _gt_fun_nodeIds :: a -> Set NodeId
89 -- , _gt_fun_size :: a -> Int
90 }
91
92 makeLenses 'GroupedTextParams
93
94 groupedTextWithStem :: Ord b
95 => GroupedTextParams a b
96 -> Map Text a
97 -> Map Stem (GroupedText b)
98 groupedTextWithStem gparams from =
99 Map.fromListWith union $ map (group gparams) $ Map.toList from
100 where
101 group gparams' (t,d) = let t' = (gparams' ^. gt_fun_stem) t
102 in (t', GroupedText
103 Nothing
104 t
105 ((gparams' ^. gt_fun_score) d)
106 ((gparams' ^. gt_fun_texts) d)
107 (size t)
108 t'
109 ((gparams' ^. gt_fun_nodeIds) d)
110 )
111
112 ------------------------------------------------------------------------
113 toGroupedText :: ( FlowList a b
114 , Ord b
115 )
116 => GroupedTextParams a b
117 -> Map Text FlowListScores
118 -> Map Text c
119 -> Map Stem (GroupedText b)
120 toGroupedText groupParams scores =
121 (groupWithStem groupParams) . (groupWithScores scores)
122
123
124 groupWithStem :: ( FlowList a b
125 , Ord b
126 )
127 => GroupedTextParams a b
128 -> ([a], Map Text (GroupedText b))
129 -> Map Stem (GroupedText b)
130 groupWithStem _ = snd -- TODO (just for tests on Others Ngrams which do not need stem)
131
132
133 groupWithScores :: (FlowList a b, Ord b)
134 => Map Text FlowListScores
135 -> Map Text c
136 -> ([a], Map Text (GroupedText b))
137 groupWithScores scores ms' = foldl' fun_group start ms
138 where
139 start = ([], Map.empty)
140 ms = map selfParent $ Map.toList ms'
141
142 fun_group (left, grouped) current =
143 case Map.lookup (hasNgrams current) scores of
144 Just scores' ->
145 case keyWithMaxValue $ scores' ^. flc_parents of
146 Nothing -> (left, Map.alter (updateWith scores' current) (hasNgrams current) grouped)
147 Just parent -> fun_group (left, grouped) (withParent ms' parent current)
148 Nothing -> (current : left, grouped)
149
150 updateWith scores current Nothing = Just $ createGroupWithScores scores current
151 updateWith scores current (Just x) = Just $ updateGroupWithScores scores current x
152
153 ------------------------------------------------------------------------
154 type FlowList a b = (HasNgrams a, HasGroupWithScores a b, WithParent a)
155
156 class HasNgrams a where
157 hasNgrams :: a -> Text
158
159 class HasGroup a b | a -> b where
160 hasGroup :: a -> GroupedText b
161
162 class HasGroupWithStem a b where
163 hasGroupWithStem :: GroupedTextParams a b -> Map Text a -> Map Stem (GroupedText b)
164
165 class HasGroupWithScores a b | a -> b where
166 createGroupWithScores :: FlowListScores -> a -> GroupedText b
167 updateGroupWithScores :: FlowListScores -> a -> GroupedText b -> GroupedText b
168
169 class WithParent a where
170 selfParent :: (Text, c) -> a
171 withParent :: Map Text c -> Text -> a -> a
172 union :: a -> a -> a
173
174 ------------------------------------------------------------------------
175 instance Ord a => WithParent (GroupedText a) where
176 union (GroupedText lt1 label1 score1 group1 s1 stem1 nodes1)
177 (GroupedText lt2 label2 score2 group2 s2 stem2 nodes2)
178 | score1 >= score2 = GroupedText lt label1 score1 (Set.insert label2 gr) s1 stem1 nodes
179 | otherwise = GroupedText lt label2 score2 (Set.insert label1 gr) s2 stem2 nodes
180 where
181 lt = lt1 <> lt2
182 gr = Set.union group1 group2
183 nodes = Set.union nodes1 nodes2
184
185
186 ------------------------------------------------------------------------
187 data GroupedTextOrigin a =
188 GroupedTextOrigin { _gto_lable :: !Text
189 , _gto_ngramsType :: !NgramsType
190 , _gto_score :: !a
191 , _gto_listType :: !(Maybe ListType)
192 , _gto_children :: !(Set Text)
193 , _gto_nodes :: !(Set NodeId)
194 }
195
196 data GroupedTextStem a =
197 GroupedTextStem { _gts_origin :: !(GroupedTextOrigin a)
198 , _gts_stem :: !Stem
199 }
200
201 ------------------------------------------------------------------------
202 type Stem = Text
203 data GroupedText score =
204 GroupedText { _gt_listType :: !(Maybe ListType)
205 , _gt_label :: !Text
206 , _gt_score :: !score
207 , _gt_children :: !(Set Text)
208 , _gt_size :: !Int
209 , _gt_stem :: !Stem -- needed ?
210 , _gt_nodes :: !(Set NodeId)
211 } {-deriving Show--}
212 --{-
213 instance Show score => Show (GroupedText score) where
214 show (GroupedText lt l s _ _ _ _) = show l <> " : " <> show lt <> " : " <> show s
215 --}
216
217 instance (Eq a) => Eq (GroupedText a) where
218 (==) (GroupedText _ _ score1 _ _ _ _)
219 (GroupedText _ _ score2 _ _ _ _) = (==) score1 score2
220
221 instance (Eq a, Ord a) => Ord (GroupedText a) where
222 compare (GroupedText _ _ score1 _ _ _ _)
223 (GroupedText _ _ score2 _ _ _ _) = compare score1 score2
224
225 -- | Lenses Instances
226 makeLenses 'GroupedText
227
228 ------------------------------------------------------------------------
229 -- to remove
230 -- | These instances seeems useless, just for debug purpose
231 instance HasNgrams (Set Text, Set NodeId) where
232 hasNgrams = fromMaybe "Nothing" . head . Set.elems . fst
233
234 instance HasGroupWithScores (Set Text, Set NodeId) Int where
235 createGroupWithScores = undefined
236 updateGroupWithScores = undefined
237
238 instance WithParent (Set Text, Set NodeId) where
239 union = undefined
240
241 ------------------------------------------------------------------------
242 instance HasNgrams (Text, Set NodeId) where
243 hasNgrams (t, _) = t
244
245 instance HasGroupWithScores (Text, Set NodeId) Int where
246 createGroupWithScores fs (t, ns) = GroupedText (keyWithMaxValue $ fs ^. flc_lists)
247 label
248 (Set.size ns)
249 children
250 (size t)
251 t
252 ns
253 where
254 (label, children) = case keyWithMaxValue $ fs ^. flc_parents of
255 Nothing -> (t, Set.empty)
256 Just t' -> (t', Set.singleton t)
257
258 updateGroupWithScores fs (t, ns) g = set gt_listType (keyWithMaxValue $ fs ^. flc_lists)
259 $ set gt_nodes (Set.union ns $ g ^. gt_nodes) g
260
261 ------------------------------------------------------------------------
262 -- | To be removed
263 addListType :: Map Text ListType -> GroupedText a -> GroupedText a
264 addListType m g = set gt_listType (hasListType m g) g
265 where
266 hasListType :: Map Text ListType -> GroupedText a -> Maybe ListType
267 hasListType m' (GroupedText _ label _ g' _ _ _) =
268 List.foldl' (<>) Nothing
269 $ map (\t -> Map.lookup t m')
270 $ Set.toList
271 $ Set.insert label g'