2 Module : Gargantext.Core.Text.Group
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE TemplateHaskell #-}
13 {-# LANGUAGE ConstraintKinds #-}
14 {-# LANGUAGE TypeFamilies #-}
15 {-# LANGUAGE FunctionalDependencies #-}
18 module Gargantext.Core.Text.Group
21 import Control.Lens (makeLenses, set, (^.))
24 import Data.Text (Text)
25 import Gargantext.Core (Lang(..))
26 import Gargantext.Core.Text (size)
27 import Gargantext.Core.Types (ListType(..)) -- (MasterCorpusId, UserCorpusId)
28 import Gargantext.Database.Admin.Types.Node (NodeId)
29 -- import Gargantext.Core.Text.List.Learn (Model(..))
30 import Gargantext.Core.Text.List.Social.Group (FlowListScores(..), flc_lists)
31 import Gargantext.Core.Text.Terms.Mono.Stem (stem)
32 import Gargantext.Prelude
33 import qualified Data.Set as Set
34 import qualified Data.Map as Map
35 import qualified Data.List as List
36 import qualified Data.Text as Text
39 data NgramsListBuilder = BuilderStepO { stemSize :: !Int
41 , stopSize :: !StopSize
43 | BuilderStep1 { withModel :: !Model }
44 | BuilderStepN { withModel :: !Model }
45 | Tficf { nlb_lang :: !Lang
48 , nlb_stopSize :: !StopSize
49 , nlb_userCorpusId :: !UserCorpusId
50 , nlb_masterCorpusId :: !MasterCorpusId
54 data StopSize = StopSize {unStopSize :: !Int}
56 -- | TODO: group with 2 terms only can be
57 -- discussed. Main purpose of this is offering
58 -- a first grouping option to user and get some
59 -- enriched data to better learn and improve that algo
60 data GroupParams = GroupParams { unGroupParams_lang :: !Lang
61 , unGroupParams_len :: !Int
62 , unGroupParams_limit :: !Int
63 , unGroupParams_stopSize :: !StopSize
67 ngramsGroup :: GroupParams
70 ngramsGroup GroupIdentity = identity
71 ngramsGroup (GroupParams l _m _n _) =
76 -- . (List.filter (\t -> Text.length t > m))
78 . Text.replace "-" " "
80 ------------------------------------------------------------------------
81 mergeMapParent :: Map Text (GroupedText b)
82 -> Map Text (Map Text Int)
83 -> Map Text (GroupedText b)
84 mergeMapParent = undefined
86 ------------------------------------------------------------------------
87 toGroupedText :: Ord b
93 -> Map Stem (GroupedText b)
94 toGroupedText fun_stem fun_score fun_texts fun_nodeIds from =
95 Map.fromListWith grouping $ map group from
97 group (t,d) = let t' = fun_stem t
112 grouping (GroupedText lt1 label1 score1 group1 s1 stem1 nodes1)
113 (GroupedText lt2 label2 score2 group2 s2 stem2 nodes2)
114 | score1 >= score2 = GroupedText lt label1 score1 (Set.insert label2 gr) s1 stem1 nodes
115 | otherwise = GroupedText lt label2 score2 (Set.insert label1 gr) s2 stem2 nodes
118 gr = Set.union group1 group2
119 nodes = Set.union nodes1 nodes2
121 ------------------------------------------------------------------------
122 toGroupedText_FlowListScores :: ( FlowList a b
126 -> Map Text FlowListScores
127 -> Map Text (GroupedText b)
128 toGroupedText_FlowListScores = undefined
131 toGroupedText_FlowListScores' :: ( FlowList a b
134 -> Map Text FlowListScores
136 , Map Text (GroupedText b)
138 toGroupedText_FlowListScores' ms mf = foldl' fun_group start ms
140 start = ([], Map.empty)
141 fun_group (left, grouped) current =
142 case Map.lookup (hasNgrams current) mf of
143 Just scores -> (left, Map.alter (updateWith scores current) (hasNgrams current) grouped)
144 Nothing -> (current : left, grouped)
145 updateWith scores current Nothing = Just $ createGroupWith scores current
146 updateWith scores current (Just x) = Just $ updateGroupWith scores current x
148 type FlowList a b = (HasNgrams a, HasGroup a b)
150 class HasNgrams a where
151 hasNgrams :: a -> Text
153 class HasGroup a b | a -> b where
154 createGroupWith :: FlowListScores -> a -> GroupedText b
155 updateGroupWith :: FlowListScores -> a
159 ------------------------------------------
160 instance HasGroup (Text, Set NodeId) Int where
161 createGroupWith fs (t, ns) = GroupedText (mapMax $ fs ^. flc_lists)
168 updateGroupWith fs (t, ns) g = undefined
170 mapMax :: Map a b -> Maybe a
171 mapMax m = (fst . fst) <$> Map.maxViewWithKey m
172 ------------------------------------------------------------------------
175 data GroupedText score =
176 GroupedText { _gt_listType :: !(Maybe ListType)
177 , _gt_label :: !Label
178 , _gt_score :: !score
179 , _gt_children :: !(Set Text)
182 , _gt_nodes :: !(Set NodeId)
185 instance Show score => Show (GroupedText score) where
186 show (GroupedText lt l s _ _ _ _) = show l <> " : " <> show lt <> " : " <> show s
189 instance (Eq a) => Eq (GroupedText a) where
190 (==) (GroupedText _ _ score1 _ _ _ _)
191 (GroupedText _ _ score2 _ _ _ _) = (==) score1 score2
193 instance (Eq a, Ord a) => Ord (GroupedText a) where
194 compare (GroupedText _ _ score1 _ _ _ _)
195 (GroupedText _ _ score2 _ _ _ _) = compare score1 score2
198 makeLenses 'GroupedText
200 ------------------------------------------------------------------------
201 addListType :: Map Text ListType -> GroupedText a -> GroupedText a
202 addListType m g = set gt_listType (hasListType m g) g
204 hasListType :: Map Text ListType -> GroupedText a -> Maybe ListType
205 hasListType m' (GroupedText _ label _ g' _ _ _) =
206 List.foldl' (<>) Nothing
207 $ map (\t -> Map.lookup t m')
209 $ Set.insert label g'