2 Module : Gargantext.Core.Text.List.Group
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE TemplateHaskell #-}
13 {-# LANGUAGE ConstraintKinds #-}
14 {-# LANGUAGE TypeFamilies #-}
15 {-# LANGUAGE FunctionalDependencies #-}
18 module Gargantext.Core.Text.List.Group
21 import Data.Maybe (fromMaybe)
22 import Control.Lens (makeLenses, set, (^.))
25 import Data.Text (Text)
26 import Data.Semigroup (Semigroup, (<>))
27 import Gargantext.Core (Lang(..))
28 import Gargantext.Core.Text (size)
29 import Gargantext.Core.Types (ListType(..)) -- (MasterCorpusId, UserCorpusId)
30 import Gargantext.Database.Admin.Types.Node (NodeId)
31 -- import Gargantext.Core.Text.List.Learn (Model(..))
32 import Gargantext.Core.Text.List.Social.Scores (FlowListScores(..), flc_lists, flc_parents, keyWithMaxValue)
33 import Gargantext.Core.Text.Terms.Mono.Stem (stem)
34 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
35 import Gargantext.Prelude
36 import qualified Data.Set as Set
37 import qualified Data.Map as Map
38 import qualified Data.List as List
39 import qualified Data.Text as Text
42 data NgramsListBuilder = BuilderStepO { stemSize :: !Int
44 , stopSize :: !StopSize
46 | BuilderStep1 { withModel :: !Model }
47 | BuilderStepN { withModel :: !Model }
48 | Tficf { nlb_lang :: !Lang
51 , nlb_stopSize :: !StopSize
52 , nlb_userCorpusId :: !UserCorpusId
53 , nlb_masterCorpusId :: !MasterCorpusId
57 data StopSize = StopSize {unStopSize :: !Int}
59 -- | TODO: group with 2 terms only can be
60 -- discussed. Main purpose of this is offering
61 -- a first grouping option to user and get some
62 -- enriched data to better learn and improve that algo
63 data GroupParams = GroupParams { unGroupParams_lang :: !Lang
64 , unGroupParams_len :: !Int
65 , unGroupParams_limit :: !Int
66 , unGroupParams_stopSize :: !StopSize
70 ngramsGroup :: GroupParams
73 ngramsGroup GroupIdentity = identity
74 ngramsGroup (GroupParams l _m _n _) =
79 -- . (List.filter (\t -> Text.length t > m))
81 . Text.replace "-" " "
83 ------------------------------------------------------------------------
84 data GroupedTextParams a b =
85 GroupedTextParams { _gt_fun_stem :: Text -> Text
86 , _gt_fun_score :: a -> b
87 , _gt_fun_texts :: a -> Set Text
88 , _gt_fun_nodeIds :: a -> Set NodeId
89 -- , _gt_fun_size :: a -> Int
92 makeLenses 'GroupedTextParams
94 groupedTextWithStem :: Ord b
95 => GroupedTextParams a b
97 -> Map Stem (GroupedText b)
98 groupedTextWithStem gparams from =
99 Map.fromListWith union $ map (group gparams) $ Map.toList from
101 group gparams' (t,d) = let t' = (gparams' ^. gt_fun_stem) t
105 ((gparams' ^. gt_fun_score) d)
106 ((gparams' ^. gt_fun_texts) d)
109 ((gparams' ^. gt_fun_nodeIds) d)
112 ------------------------------------------------------------------------
113 toGroupedText :: ( FlowList a b
116 => GroupedTextParams a b
117 -> Map Text FlowListScores
119 -> Map Stem (GroupedText b)
120 toGroupedText groupParams scores =
121 (groupWithStem groupParams) . (groupWithScores scores)
124 groupWithStem :: ( FlowList a b
127 => GroupedTextParams a b
128 -> ([a], Map Text (GroupedText b))
129 -> Map Stem (GroupedText b)
130 groupWithStem _ = snd -- TODO (just for tests on Others Ngrams which do not need stem)
133 groupWithScores :: (FlowList a b, Ord b)
134 => Map Text FlowListScores
136 -> ([a], Map Text (GroupedText b))
137 groupWithScores scores ms' = foldl' fun_group start ms
139 start = ([], Map.empty)
140 ms = map selfParent $ Map.toList ms'
142 fun_group (left, grouped) current =
143 case Map.lookup (hasNgrams current) scores of
145 case keyWithMaxValue $ scores' ^. flc_parents of
146 Nothing -> (left, Map.alter (updateWith scores' current) (hasNgrams current) grouped)
147 Just parent -> fun_group (left, grouped) (withParent ms' parent current)
148 Nothing -> (current : left, grouped)
150 updateWith scores current Nothing = Just $ createGroupWithScores scores current
151 updateWith scores current (Just x) = Just $ updateGroupWithScores scores current x
153 ------------------------------------------------------------------------
154 type FlowList a b = (HasNgrams a, HasGroupWithScores a b, WithParent a)
156 class HasNgrams a where
157 hasNgrams :: a -> Text
159 class HasGroup a b | a -> b where
160 hasGroup :: a -> GroupedText b
162 class HasGroupWithStem a b where
163 hasGroupWithStem :: GroupedTextParams a b -> Map Text a -> Map Stem (GroupedText b)
165 class HasGroupWithScores a b | a -> b where
166 createGroupWithScores :: FlowListScores -> a -> GroupedText b
167 updateGroupWithScores :: FlowListScores -> a -> GroupedText b -> GroupedText b
169 class WithParent a where
170 selfParent :: (Text, c) -> a
171 withParent :: Map Text c -> Text -> a -> a
174 ------------------------------------------------------------------------
175 instance Ord a => WithParent (GroupedText a) where
176 union (GroupedText lt1 label1 score1 group1 s1 stem1 nodes1)
177 (GroupedText lt2 label2 score2 group2 s2 stem2 nodes2)
178 | score1 >= score2 = GroupedText lt label1 score1 (Set.insert label2 gr) s1 stem1 nodes
179 | otherwise = GroupedText lt label2 score2 (Set.insert label1 gr) s2 stem2 nodes
182 gr = Set.union group1 group2
183 nodes = Set.union nodes1 nodes2
186 ------------------------------------------------------------------------
187 data GroupedTextOrigin a =
188 GroupedTextOrigin { _gto_lable :: !Text
189 , _gto_ngramsType :: !NgramsType
191 , _gto_listType :: !(Maybe ListType)
192 , _gto_children :: !(Set Text)
193 , _gto_nodes :: !(Set NodeId)
196 data GroupedTextStem a =
197 GroupedTextStem { _gts_origin :: !(GroupedTextOrigin a)
201 ------------------------------------------------------------------------
203 data GroupedText score =
204 GroupedText { _gt_listType :: !(Maybe ListType)
206 , _gt_score :: !score
207 , _gt_children :: !(Set Text)
209 , _gt_stem :: !Stem -- needed ?
210 , _gt_nodes :: !(Set NodeId)
213 instance Show score => Show (GroupedText score) where
214 show (GroupedText lt l s _ _ _ _) = show l <> " : " <> show lt <> " : " <> show s
217 instance (Eq a) => Eq (GroupedText a) where
218 (==) (GroupedText _ _ score1 _ _ _ _)
219 (GroupedText _ _ score2 _ _ _ _) = (==) score1 score2
221 instance (Eq a, Ord a) => Ord (GroupedText a) where
222 compare (GroupedText _ _ score1 _ _ _ _)
223 (GroupedText _ _ score2 _ _ _ _) = compare score1 score2
225 -- | Lenses Instances
226 makeLenses 'GroupedText
228 ------------------------------------------------------------------------
230 -- | These instances seeems useless, just for debug purpose
231 instance HasNgrams (Set Text, Set NodeId) where
232 hasNgrams = fromMaybe "Nothing" . head . Set.elems . fst
234 instance HasGroupWithScores (Set Text, Set NodeId) Int where
235 createGroupWithScores = undefined
236 updateGroupWithScores = undefined
238 instance WithParent (Set Text, Set NodeId) where
241 ------------------------------------------------------------------------
242 instance HasNgrams (Text, Set NodeId) where
245 instance HasGroupWithScores (Text, Set NodeId) Int where
246 createGroupWithScores fs (t, ns) = GroupedText (keyWithMaxValue $ fs ^. flc_lists)
254 (label, children) = case keyWithMaxValue $ fs ^. flc_parents of
255 Nothing -> (t, Set.empty)
256 Just t' -> (t', Set.singleton t)
258 updateGroupWithScores fs (t, ns) g = set gt_listType (keyWithMaxValue $ fs ^. flc_lists)
259 $ set gt_nodes (Set.union ns $ g ^. gt_nodes) g
261 ------------------------------------------------------------------------
263 addListType :: Map Text ListType -> GroupedText a -> GroupedText a
264 addListType m g = set gt_listType (hasListType m g) g
266 hasListType :: Map Text ListType -> GroupedText a -> Maybe ListType
267 hasListType m' (GroupedText _ label _ g' _ _ _) =
268 List.foldl' (<>) Nothing
269 $ map (\t -> Map.lookup t m')
271 $ Set.insert label g'