]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/List/Group.hs
[FEAT] SocialLists clean before connection
[gargantext.git] / src / Gargantext / Core / Text / List / Group.hs
1 {-|
2 Module : Gargantext.Core.Text.List.Group
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE TemplateHaskell #-}
13 {-# LANGUAGE ConstraintKinds #-}
14 {-# LANGUAGE TypeFamilies #-}
15 {-# LANGUAGE FunctionalDependencies #-}
16
17
18 module Gargantext.Core.Text.List.Group
19 where
20
21 import Control.Lens (makeLenses, set, (^.))
22 import Data.Set (Set)
23 import Data.Map (Map)
24 import Data.Text (Text)
25 import Data.Semigroup (Semigroup, (<>))
26 import Gargantext.Core (Lang(..))
27 import Gargantext.Core.Text (size)
28 import Gargantext.Core.Types (ListType(..)) -- (MasterCorpusId, UserCorpusId)
29 import Gargantext.Database.Admin.Types.Node (NodeId)
30 -- import Gargantext.Core.Text.List.Learn (Model(..))
31 import Gargantext.Core.Text.List.Social.Scores (FlowListScores(..), flc_lists, flc_parents, keyWithMaxValue)
32 import Gargantext.Core.Text.Terms.Mono.Stem (stem)
33 import Gargantext.Prelude
34 import qualified Data.Set as Set
35 import qualified Data.Map as Map
36 import qualified Data.List as List
37 import qualified Data.Text as Text
38
39 {-
40 data NgramsListBuilder = BuilderStepO { stemSize :: !Int
41 , stemX :: !Int
42 , stopSize :: !StopSize
43 }
44 | BuilderStep1 { withModel :: !Model }
45 | BuilderStepN { withModel :: !Model }
46 | Tficf { nlb_lang :: !Lang
47 , nlb_group1 :: !Int
48 , nlb_group2 :: !Int
49 , nlb_stopSize :: !StopSize
50 , nlb_userCorpusId :: !UserCorpusId
51 , nlb_masterCorpusId :: !MasterCorpusId
52 }
53 -}
54
55 data StopSize = StopSize {unStopSize :: !Int}
56
57 -- | TODO: group with 2 terms only can be
58 -- discussed. Main purpose of this is offering
59 -- a first grouping option to user and get some
60 -- enriched data to better learn and improve that algo
61 data GroupParams = GroupParams { unGroupParams_lang :: !Lang
62 , unGroupParams_len :: !Int
63 , unGroupParams_limit :: !Int
64 , unGroupParams_stopSize :: !StopSize
65 }
66 | GroupIdentity
67
68 ngramsGroup :: GroupParams
69 -> Text
70 -> Text
71 ngramsGroup GroupIdentity = identity
72 ngramsGroup (GroupParams l _m _n _) =
73 Text.intercalate " "
74 . map (stem l)
75 -- . take n
76 . List.sort
77 -- . (List.filter (\t -> Text.length t > m))
78 . Text.splitOn " "
79 . Text.replace "-" " "
80
81 ------------------------------------------------------------------------
82 data GroupedTextParams a b =
83 GroupedTextParams { _gt_fun_stem :: Text -> Text
84 , _gt_fun_score :: a -> b
85 , _gt_fun_texts :: a -> Set Text
86 , _gt_fun_nodeIds :: a -> Set NodeId
87 -- , _gt_fun_size :: a -> Int
88 }
89
90 makeLenses 'GroupedTextParams
91
92 toGroupedText :: Ord b
93 => GroupedTextParams a b
94 -> [(Text,a)]
95 -> Map Stem (GroupedText b)
96 toGroupedText gparams from =
97 Map.fromListWith union $ map group from
98 where
99 group (t,d) = let t' = (gparams ^. gt_fun_stem) t
100 in (t', GroupedText
101 Nothing
102 t
103 ((gparams ^. gt_fun_score) d)
104 ((gparams ^. gt_fun_texts) d)
105 (size t)
106 t'
107 ((gparams ^. gt_fun_nodeIds) d)
108 )
109
110 ------------------------------------------------------------------------
111 ------------------------------------------------------------------------
112
113 toGroupedText' :: ( FlowList a b
114 , Ord b
115 )
116 => GroupedTextParams a b
117 -> Map Text FlowListScores
118 -> Map Text a
119 -> Map Stem (GroupedText b)
120 toGroupedText' groupParams scores =
121 (groupWithStem groupParams) . (groupWithScores scores)
122
123
124 groupWithStem :: ( FlowList a b
125 , Ord b
126 )
127 => GroupedTextParams a b
128 -> ([a], Map Text (GroupedText b))
129 -> Map Stem (GroupedText b)
130 groupWithStem _ = snd -- TODO
131
132
133 groupWithScores :: (FlowList a b, Ord b)
134 => Map Text FlowListScores
135 -> Map Text c
136 -> ([a], Map Text (GroupedText b))
137 groupWithScores scores ms' = foldl' fun_group start ms
138 where
139 start = ([], Map.empty)
140 ms = map selfParent (Map.toList ms')
141
142 fun_group (left, grouped) current =
143 case Map.lookup (hasNgrams current) scores of
144 Just scores' ->
145 case keyWithMaxValue $ scores' ^. flc_parents of
146 Nothing -> (left, Map.alter (updateWith scores' current) (hasNgrams current) grouped)
147 Just parent -> fun_group (left, grouped) (withParent ms' parent current)
148 Nothing -> (current : left, grouped)
149
150 updateWith scores current Nothing = Just $ createGroupWith scores current
151 updateWith scores current (Just x) = Just $ updateGroupWith scores current x
152
153 ------------------------------------------------------------------------
154 type FlowList a b = (HasNgrams a, HasGroup a b, WithParent a)
155
156 class HasNgrams a where
157 hasNgrams :: a -> Text
158
159 class HasGroup a b | a -> b where
160 createGroupWith :: FlowListScores -> a -> GroupedText b
161 updateGroupWith :: FlowListScores -> a -> GroupedText b -> GroupedText b
162
163 class WithParent a where
164 selfParent :: (Text, c) -> a
165 withParent :: Map Text c -> Text -> a -> a
166 union :: a -> a -> a
167
168 ------------------------------------------------------------------------
169 instance Ord a => WithParent (GroupedText a) where
170 union (GroupedText lt1 label1 score1 group1 s1 stem1 nodes1)
171 (GroupedText lt2 label2 score2 group2 s2 stem2 nodes2)
172 | score1 >= score2 = GroupedText lt label1 score1 (Set.insert label2 gr) s1 stem1 nodes
173 | otherwise = GroupedText lt label2 score2 (Set.insert label1 gr) s2 stem2 nodes
174 where
175 lt = lt1 <> lt2
176 gr = Set.union group1 group2
177 nodes = Set.union nodes1 nodes2
178
179 {-
180 selfParent (t,d) = let t' = (gparams ^. gt_fun_stem) t
181 in (t', GroupedText
182 Nothing
183 t
184 ((gparams ^. gt_fun_score) d)
185 ((gparams ^. gt_fun_texts) d)
186 (size t)
187 t'
188 ((gparams ^. gt_fun_nodeIds) d)
189 )
190 -}
191
192
193 ------------------------------------------------------------------------
194 type Stem = Text
195 type Label = Text
196 data GroupedText score =
197 GroupedText { _gt_listType :: !(Maybe ListType)
198 , _gt_label :: !Label
199 , _gt_score :: !score
200 , _gt_children :: !(Set Text)
201 , _gt_size :: !Int
202 , _gt_stem :: !Stem -- needed ?
203 , _gt_nodes :: !(Set NodeId)
204 } {-deriving Show--}
205 --{-
206 instance Show score => Show (GroupedText score) where
207 show (GroupedText lt l s _ _ _ _) = show l <> " : " <> show lt <> " : " <> show s
208 --}
209
210 instance (Eq a) => Eq (GroupedText a) where
211 (==) (GroupedText _ _ score1 _ _ _ _)
212 (GroupedText _ _ score2 _ _ _ _) = (==) score1 score2
213
214 instance (Eq a, Ord a) => Ord (GroupedText a) where
215 compare (GroupedText _ _ score1 _ _ _ _)
216 (GroupedText _ _ score2 _ _ _ _) = compare score1 score2
217
218 -- | Lenses Instances
219 makeLenses 'GroupedText
220
221 ------------------------------------------------------------------------
222 instance HasNgrams (Text, Set NodeId) where
223 hasNgrams (t, _) = t
224
225 instance HasGroup (Text, Set NodeId) Int where
226 createGroupWith fs (t, ns) = GroupedText (keyWithMaxValue $ fs ^. flc_lists)
227 label
228 (Set.size ns)
229 children
230 (size t)
231 t
232 ns
233 where
234 (label, children) = case keyWithMaxValue $ fs ^. flc_parents of
235 Nothing -> (t, Set.empty)
236 Just t' -> (t', Set.singleton t)
237
238 updateGroupWith fs (t, ns) g = set gt_listType (keyWithMaxValue $ fs ^. flc_lists)
239 $ set gt_nodes (Set.union ns $ g ^. gt_nodes) g
240
241 ------------------------------------------------------------------------
242 -- | To be removed
243 addListType :: Map Text ListType -> GroupedText a -> GroupedText a
244 addListType m g = set gt_listType (hasListType m g) g
245 where
246 hasListType :: Map Text ListType -> GroupedText a -> Maybe ListType
247 hasListType m' (GroupedText _ label _ g' _ _ _) =
248 List.foldl' (<>) Nothing
249 $ map (\t -> Map.lookup t m')
250 $ Set.toList
251 $ Set.insert label g'