2 Module : Gargantext.Core.Text.Group
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE TemplateHaskell #-}
13 {-# LANGUAGE ConstraintKinds #-}
14 {-# LANGUAGE TypeFamilies #-}
16 module Gargantext.Core.Text.Group
19 import Control.Lens (makeLenses, set, (^.))
22 import Data.Text (Text)
23 import Gargantext.Core (Lang(..))
24 import Gargantext.Core.Text (size)
25 import Gargantext.Core.Types (ListType(..)) -- (MasterCorpusId, UserCorpusId)
26 import Gargantext.Database.Admin.Types.Node (NodeId)
27 -- import Gargantext.Core.Text.List.Learn (Model(..))
28 import Gargantext.Core.Text.List.Social.Group (FlowListScores(..), flc_lists)
29 import Gargantext.Core.Text.Terms.Mono.Stem (stem)
30 import Gargantext.Prelude
31 import qualified Data.Set as Set
32 import qualified Data.Map as Map
33 import qualified Data.List as List
34 import qualified Data.Text as Text
37 data NgramsListBuilder = BuilderStepO { stemSize :: !Int
39 , stopSize :: !StopSize
41 | BuilderStep1 { withModel :: !Model }
42 | BuilderStepN { withModel :: !Model }
43 | Tficf { nlb_lang :: !Lang
46 , nlb_stopSize :: !StopSize
47 , nlb_userCorpusId :: !UserCorpusId
48 , nlb_masterCorpusId :: !MasterCorpusId
52 data StopSize = StopSize {unStopSize :: !Int}
54 -- | TODO: group with 2 terms only can be
55 -- discussed. Main purpose of this is offering
56 -- a first grouping option to user and get some
57 -- enriched data to better learn and improve that algo
58 data GroupParams = GroupParams { unGroupParams_lang :: !Lang
59 , unGroupParams_len :: !Int
60 , unGroupParams_limit :: !Int
61 , unGroupParams_stopSize :: !StopSize
65 ngramsGroup :: GroupParams
68 ngramsGroup GroupIdentity = identity
69 ngramsGroup (GroupParams l _m _n _) =
74 -- . (List.filter (\t -> Text.length t > m))
76 . Text.replace "-" " "
78 ------------------------------------------------------------------------
79 mergeMapParent :: Map Text (GroupedText b)
80 -> Map Text (Map Text Int)
81 -> Map Text (GroupedText b)
82 mergeMapParent = undefined
84 ------------------------------------------------------------------------
85 toGroupedText :: Ord b
91 -> Map Stem (GroupedText b)
92 toGroupedText fun_stem fun_score fun_texts fun_nodeIds from =
93 Map.fromListWith grouping $ map group from
95 group (t,d) = let t' = fun_stem t
110 grouping (GroupedText lt1 label1 score1 group1 s1 stem1 nodes1)
111 (GroupedText lt2 label2 score2 group2 s2 stem2 nodes2)
112 | score1 >= score2 = GroupedText lt label1 score1 (Set.insert label2 gr) s1 stem1 nodes
113 | otherwise = GroupedText lt label2 score2 (Set.insert label1 gr) s2 stem2 nodes
116 gr = Set.union group1 group2
117 nodes = Set.union nodes1 nodes2
119 ------------------------------------------------------------------------
120 toGroupedText_FlowListScores :: ( FlowList a
124 -> Map Text FlowListScores
125 -> Map Text (GroupedText b)
126 toGroupedText_FlowListScores = undefined
129 toGroupedText_FlowListScores' :: ( FlowList a
133 -> Map Text FlowListScores
135 , Map Text (GroupedText b)
137 toGroupedText_FlowListScores' ms mf = foldl' fun_group start ms
139 start = ([], Map.empty)
140 fun_group (left, grouped) current =
141 case Map.lookup (hasNgrams current) mf of
142 Just scores -> (left, Map.alter (updateWith scores current) (hasNgrams current) grouped)
143 Nothing -> (current : left, grouped)
144 updateWith scores current Nothing = Just $ createGroupWith scores current
145 updateWith scores current (Just x) = Just $ updateGroupWith scores current x
147 type FlowList a = (HasNgrams a, HasGroup a)
149 class HasNgrams a where
150 hasNgrams :: a -> Text
152 class HasGroup a where
153 createGroupWith :: (b ~ GroupFamily a) => FlowListScores -> a -> GroupedText b
154 updateGroupWith :: (b ~ GroupFamily a)
155 => FlowListScores -> a
159 -- | Check if functional dependency is better
160 type family GroupFamily a
161 type instance GroupFamily (Text, Set NodeId) = Int
163 ------------------------------------------
164 instance HasGroup (Text, Set NodeId) where
165 createGroupWith fs (t, ns) = GroupedText (mapMax $ fs ^. flc_lists)
172 updateGroupWith fs (t, ns) g = undefined
174 mapMax :: Map a b -> Maybe a
175 mapMax m = (fst . fst) <$> Map.maxViewWithKey m
176 ------------------------------------------------------------------------
179 data GroupedText score =
180 GroupedText { _gt_listType :: !(Maybe ListType)
181 , _gt_label :: !Label
182 , _gt_score :: !score
183 , _gt_children :: !(Set Text)
186 , _gt_nodes :: !(Set NodeId)
189 instance Show score => Show (GroupedText score) where
190 show (GroupedText lt l s _ _ _ _) = show l <> " : " <> show lt <> " : " <> show s
193 instance (Eq a) => Eq (GroupedText a) where
194 (==) (GroupedText _ _ score1 _ _ _ _)
195 (GroupedText _ _ score2 _ _ _ _) = (==) score1 score2
197 instance (Eq a, Ord a) => Ord (GroupedText a) where
198 compare (GroupedText _ _ score1 _ _ _ _)
199 (GroupedText _ _ score2 _ _ _ _) = compare score1 score2
202 makeLenses 'GroupedText
204 ------------------------------------------------------------------------
205 addListType :: Map Text ListType -> GroupedText a -> GroupedText a
206 addListType m g = set gt_listType (hasListType m g) g
208 hasListType :: Map Text ListType -> GroupedText a -> Maybe ListType
209 hasListType m' (GroupedText _ label _ g' _ _ _) =
210 List.foldl' (<>) Nothing
211 $ map (\t -> Map.lookup t m')
213 $ Set.insert label g'