2 Module : Gargantext.Core.Text.List.Group
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE TemplateHaskell #-}
13 {-# LANGUAGE ConstraintKinds #-}
14 {-# LANGUAGE TypeFamilies #-}
15 {-# LANGUAGE FunctionalDependencies #-}
18 module Gargantext.Core.Text.List.Group
21 import Control.Lens (makeLenses, set, (^.))
24 import Data.Text (Text)
25 import Data.Semigroup (Semigroup, (<>))
26 import Gargantext.Core (Lang(..))
27 import Gargantext.Core.Text (size)
28 import Gargantext.Core.Types (ListType(..)) -- (MasterCorpusId, UserCorpusId)
29 import Gargantext.Database.Admin.Types.Node (NodeId)
30 -- import Gargantext.Core.Text.List.Learn (Model(..))
31 import Gargantext.Core.Text.List.Social.Scores (FlowListScores(..), flc_lists, flc_parents, keyWithMaxValue)
32 import Gargantext.Core.Text.Terms.Mono.Stem (stem)
33 import Gargantext.Prelude
34 import qualified Data.Set as Set
35 import qualified Data.Map as Map
36 import qualified Data.List as List
37 import qualified Data.Text as Text
40 data NgramsListBuilder = BuilderStepO { stemSize :: !Int
42 , stopSize :: !StopSize
44 | BuilderStep1 { withModel :: !Model }
45 | BuilderStepN { withModel :: !Model }
46 | Tficf { nlb_lang :: !Lang
49 , nlb_stopSize :: !StopSize
50 , nlb_userCorpusId :: !UserCorpusId
51 , nlb_masterCorpusId :: !MasterCorpusId
55 data StopSize = StopSize {unStopSize :: !Int}
57 -- | TODO: group with 2 terms only can be
58 -- discussed. Main purpose of this is offering
59 -- a first grouping option to user and get some
60 -- enriched data to better learn and improve that algo
61 data GroupParams = GroupParams { unGroupParams_lang :: !Lang
62 , unGroupParams_len :: !Int
63 , unGroupParams_limit :: !Int
64 , unGroupParams_stopSize :: !StopSize
68 ngramsGroup :: GroupParams
71 ngramsGroup GroupIdentity = identity
72 ngramsGroup (GroupParams l _m _n _) =
77 -- . (List.filter (\t -> Text.length t > m))
79 . Text.replace "-" " "
81 ------------------------------------------------------------------------
82 data GroupedTextParams a b =
83 GroupedTextParams { _gt_fun_stem :: Text -> Text
84 , _gt_fun_score :: a -> b
85 , _gt_fun_texts :: a -> Set Text
86 , _gt_fun_nodeIds :: a -> Set NodeId
87 -- , _gt_fun_size :: a -> Int
90 makeLenses 'GroupedTextParams
92 toGroupedText :: Ord b
93 => GroupedTextParams a b
95 -> Map Stem (GroupedText b)
96 toGroupedText gparams from =
97 Map.fromListWith union $ map group from
99 group (t,d) = let t' = (gparams ^. gt_fun_stem) t
103 ((gparams ^. gt_fun_score) d)
104 ((gparams ^. gt_fun_texts) d)
107 ((gparams ^. gt_fun_nodeIds) d)
110 ------------------------------------------------------------------------
111 ------------------------------------------------------------------------
113 toGroupedText' :: ( FlowList a b
116 => GroupedTextParams a b
117 -> Map Text FlowListScores
119 -> Map Stem (GroupedText b)
120 toGroupedText' groupParams scores =
121 (groupWithStem groupParams) . (groupWithScores scores)
124 groupWithStem :: ( FlowList a b
127 => GroupedTextParams a b
128 -> ([a], Map Text (GroupedText b))
129 -> Map Stem (GroupedText b)
130 groupWithStem _ = snd -- TODO
133 groupWithScores :: (FlowList a b, Ord b)
134 => Map Text FlowListScores
136 -> ([a], Map Text (GroupedText b))
137 groupWithScores scores ms' = foldl' fun_group start ms
139 start = ([], Map.empty)
140 ms = map selfParent (Map.toList ms')
142 fun_group (left, grouped) current =
143 case Map.lookup (hasNgrams current) scores of
145 case keyWithMaxValue $ scores' ^. flc_parents of
146 Nothing -> (left, Map.alter (updateWith scores' current) (hasNgrams current) grouped)
147 Just parent -> fun_group (left, grouped) (withParent ms' parent current)
148 Nothing -> (current : left, grouped)
150 updateWith scores current Nothing = Just $ createGroupWith scores current
151 updateWith scores current (Just x) = Just $ updateGroupWith scores current x
153 ------------------------------------------------------------------------
154 type FlowList a b = (HasNgrams a, HasGroup a b, WithParent a)
156 class HasNgrams a where
157 hasNgrams :: a -> Text
159 class HasGroup a b | a -> b where
160 createGroupWith :: FlowListScores -> a -> GroupedText b
161 updateGroupWith :: FlowListScores -> a -> GroupedText b -> GroupedText b
163 class WithParent a where
164 selfParent :: (Text, c) -> a
165 withParent :: Map Text c -> Text -> a -> a
168 ------------------------------------------------------------------------
169 instance Ord a => WithParent (GroupedText a) where
170 union (GroupedText lt1 label1 score1 group1 s1 stem1 nodes1)
171 (GroupedText lt2 label2 score2 group2 s2 stem2 nodes2)
172 | score1 >= score2 = GroupedText lt label1 score1 (Set.insert label2 gr) s1 stem1 nodes
173 | otherwise = GroupedText lt label2 score2 (Set.insert label1 gr) s2 stem2 nodes
176 gr = Set.union group1 group2
177 nodes = Set.union nodes1 nodes2
180 selfParent (t,d) = let t' = (gparams ^. gt_fun_stem) t
184 ((gparams ^. gt_fun_score) d)
185 ((gparams ^. gt_fun_texts) d)
188 ((gparams ^. gt_fun_nodeIds) d)
193 ------------------------------------------------------------------------
196 data GroupedText score =
197 GroupedText { _gt_listType :: !(Maybe ListType)
198 , _gt_label :: !Label
199 , _gt_score :: !score
200 , _gt_children :: !(Set Text)
202 , _gt_stem :: !Stem -- needed ?
203 , _gt_nodes :: !(Set NodeId)
206 instance Show score => Show (GroupedText score) where
207 show (GroupedText lt l s _ _ _ _) = show l <> " : " <> show lt <> " : " <> show s
210 instance (Eq a) => Eq (GroupedText a) where
211 (==) (GroupedText _ _ score1 _ _ _ _)
212 (GroupedText _ _ score2 _ _ _ _) = (==) score1 score2
214 instance (Eq a, Ord a) => Ord (GroupedText a) where
215 compare (GroupedText _ _ score1 _ _ _ _)
216 (GroupedText _ _ score2 _ _ _ _) = compare score1 score2
218 -- | Lenses Instances
219 makeLenses 'GroupedText
221 ------------------------------------------------------------------------
222 instance HasNgrams (Text, Set NodeId) where
225 instance HasGroup (Text, Set NodeId) Int where
226 createGroupWith fs (t, ns) = GroupedText (keyWithMaxValue $ fs ^. flc_lists)
234 (label, children) = case keyWithMaxValue $ fs ^. flc_parents of
235 Nothing -> (t, Set.empty)
236 Just t' -> (t', Set.singleton t)
238 updateGroupWith fs (t, ns) g = set gt_listType (keyWithMaxValue $ fs ^. flc_lists)
239 $ set gt_nodes (Set.union ns $ g ^. gt_nodes) g
241 ------------------------------------------------------------------------
243 addListType :: Map Text ListType -> GroupedText a -> GroupedText a
244 addListType m g = set gt_listType (hasListType m g) g
246 hasListType :: Map Text ListType -> GroupedText a -> Maybe ListType
247 hasListType m' (GroupedText _ label _ g' _ _ _) =
248 List.foldl' (<>) Nothing
249 $ map (\t -> Map.lookup t m')
251 $ Set.insert label g'