2 Module : Gargantext.Core.Text.Group
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE TemplateHaskell #-}
14 module Gargantext.Core.Text.Group
17 import Control.Lens (makeLenses, set)
20 import Data.Text (Text)
21 import Gargantext.Core (Lang(..))
22 import Gargantext.Core.Text (size)
23 import Gargantext.Core.Types (ListType(..)) -- (MasterCorpusId, UserCorpusId)
24 import Gargantext.Database.Admin.Types.Node (NodeId)
25 -- import Gargantext.Core.Text.List.Learn (Model(..))
26 import Gargantext.Core.Text.List.Social.Group (FlowListScores)
27 import Gargantext.Core.Text.Terms.Mono.Stem (stem)
28 import Gargantext.Prelude
29 import qualified Data.Set as Set
30 import qualified Data.Map as Map
31 import qualified Data.List as List
32 import qualified Data.Text as Text
35 data NgramsListBuilder = BuilderStepO { stemSize :: !Int
37 , stopSize :: !StopSize
39 | BuilderStep1 { withModel :: !Model }
40 | BuilderStepN { withModel :: !Model }
41 | Tficf { nlb_lang :: !Lang
44 , nlb_stopSize :: !StopSize
45 , nlb_userCorpusId :: !UserCorpusId
46 , nlb_masterCorpusId :: !MasterCorpusId
50 data StopSize = StopSize {unStopSize :: !Int}
52 -- | TODO: group with 2 terms only can be
53 -- discussed. Main purpose of this is offering
54 -- a first grouping option to user and get some
55 -- enriched data to better learn and improve that algo
56 data GroupParams = GroupParams { unGroupParams_lang :: !Lang
57 , unGroupParams_len :: !Int
58 , unGroupParams_limit :: !Int
59 , unGroupParams_stopSize :: !StopSize
63 ngramsGroup :: GroupParams
66 ngramsGroup GroupIdentity = identity
67 ngramsGroup (GroupParams l _m _n _) =
72 -- . (List.filter (\t -> Text.length t > m))
74 . Text.replace "-" " "
76 ------------------------------------------------------------------------
77 mergeMapParent :: Map Text (GroupedText b)
78 -> Map Text (Map Text Int)
79 -> Map Text (GroupedText b)
80 mergeMapParent = undefined
82 ------------------------------------------------------------------------
83 toGroupedText :: Ord b
89 -> Map Stem (GroupedText b)
90 toGroupedText fun_stem fun_score fun_texts fun_nodeIds from =
91 Map.fromListWith grouping $ map group from
93 group (t,d) = let t' = fun_stem t
108 grouping (GroupedText lt1 label1 score1 group1 s1 stem1 nodes1)
109 (GroupedText lt2 label2 score2 group2 s2 stem2 nodes2)
110 | score1 >= score2 = GroupedText lt label1 score1 (Set.insert label2 gr) s1 stem1 nodes
111 | otherwise = GroupedText lt label2 score2 (Set.insert label1 gr) s2 stem2 nodes
114 gr = Set.union group1 group2
115 nodes = Set.union nodes1 nodes2
117 ------------------------------------------------------------------------
119 toGroupedText_FlowListScores :: Ord a
120 => Map Text (Set NodeId)
121 -> Map Text FlowListScores
122 -> Map Text (GroupedText a)
123 toGroupedText_FlowListScores = undefined
127 toGroupedText_FlowListScores' :: Ord a
128 => Map Text (Set NodeId)
129 -> Map Text FlowListScores
130 -> ( [(Text, Set NodeId)]
131 , Map Text (GroupedText a)
133 toGroupedText_FlowListScores' = undefined
136 ------------------------------------------------------------------------
139 data GroupedText score =
140 GroupedText { _gt_listType :: !(Maybe ListType)
141 , _gt_label :: !Label
142 , _gt_score :: !score
143 , _gt_children :: !(Set Text)
146 , _gt_nodes :: !(Set NodeId)
149 instance Show score => Show (GroupedText score) where
150 show (GroupedText lt l s _ _ _ _) = show l <> " : " <> show lt <> " : " <> show s
153 instance (Eq a) => Eq (GroupedText a) where
154 (==) (GroupedText _ _ score1 _ _ _ _)
155 (GroupedText _ _ score2 _ _ _ _) = (==) score1 score2
157 instance (Eq a, Ord a) => Ord (GroupedText a) where
158 compare (GroupedText _ _ score1 _ _ _ _)
159 (GroupedText _ _ score2 _ _ _ _) = compare score1 score2
162 makeLenses 'GroupedText
164 ------------------------------------------------------------------------
165 addListType :: Map Text ListType -> GroupedText a -> GroupedText a
166 addListType m g = set gt_listType (hasListType m g) g
168 hasListType :: Map Text ListType -> GroupedText a -> Maybe ListType
169 hasListType m' (GroupedText _ label _ g' _ _ _) =
170 List.foldl' (<>) Nothing
171 $ map (\t -> Map.lookup t m')
173 $ Set.insert label g'