]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Group.hs
[FEAT] SocialList preparing merge Scores and Grouped (WIP)
[gargantext.git] / src / Gargantext / Core / Text / Group.hs
1 {-|
2 Module : Gargantext.Core.Text.Group
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE TemplateHaskell #-}
13
14 module Gargantext.Core.Text.Group
15 where
16
17 import Control.Lens (makeLenses, set)
18 import Data.Set (Set)
19 import Data.Map (Map)
20 import Data.Text (Text)
21 import Gargantext.Core (Lang(..))
22 import Gargantext.Core.Text (size)
23 import Gargantext.Core.Types (ListType(..)) -- (MasterCorpusId, UserCorpusId)
24 import Gargantext.Database.Admin.Types.Node (NodeId)
25 -- import Gargantext.Core.Text.List.Learn (Model(..))
26 import Gargantext.Core.Text.List.Social.Group (FlowListScores)
27 import Gargantext.Core.Text.Terms.Mono.Stem (stem)
28 import Gargantext.Prelude
29 import qualified Data.Set as Set
30 import qualified Data.Map as Map
31 import qualified Data.List as List
32 import qualified Data.Text as Text
33
34 {-
35 data NgramsListBuilder = BuilderStepO { stemSize :: !Int
36 , stemX :: !Int
37 , stopSize :: !StopSize
38 }
39 | BuilderStep1 { withModel :: !Model }
40 | BuilderStepN { withModel :: !Model }
41 | Tficf { nlb_lang :: !Lang
42 , nlb_group1 :: !Int
43 , nlb_group2 :: !Int
44 , nlb_stopSize :: !StopSize
45 , nlb_userCorpusId :: !UserCorpusId
46 , nlb_masterCorpusId :: !MasterCorpusId
47 }
48 -}
49
50 data StopSize = StopSize {unStopSize :: !Int}
51
52 -- | TODO: group with 2 terms only can be
53 -- discussed. Main purpose of this is offering
54 -- a first grouping option to user and get some
55 -- enriched data to better learn and improve that algo
56 data GroupParams = GroupParams { unGroupParams_lang :: !Lang
57 , unGroupParams_len :: !Int
58 , unGroupParams_limit :: !Int
59 , unGroupParams_stopSize :: !StopSize
60 }
61 | GroupIdentity
62
63 ngramsGroup :: GroupParams
64 -> Text
65 -> Text
66 ngramsGroup GroupIdentity = identity
67 ngramsGroup (GroupParams l _m _n _) =
68 Text.intercalate " "
69 . map (stem l)
70 -- . take n
71 . List.sort
72 -- . (List.filter (\t -> Text.length t > m))
73 . Text.splitOn " "
74 . Text.replace "-" " "
75
76 ------------------------------------------------------------------------
77 mergeMapParent :: Map Text (GroupedText b)
78 -> Map Text (Map Text Int)
79 -> Map Text (GroupedText b)
80 mergeMapParent = undefined
81
82 ------------------------------------------------------------------------
83 toGroupedText :: Ord b
84 => (Text -> Text )
85 -> (a -> b )
86 -> (a -> Set Text )
87 -> (a -> Set NodeId)
88 -> [(Text,a)]
89 -> Map Stem (GroupedText b)
90 toGroupedText fun_stem fun_score fun_texts fun_nodeIds from =
91 Map.fromListWith grouping $ map group from
92 where
93 group (t,d) = let t' = fun_stem t
94 in (t', GroupedText
95 Nothing
96 t
97 (fun_score d)
98 (fun_texts d)
99 (size t)
100 t'
101 (fun_nodeIds d)
102 )
103
104 grouping :: Ord a
105 => GroupedText a
106 -> GroupedText a
107 -> GroupedText a
108 grouping (GroupedText lt1 label1 score1 group1 s1 stem1 nodes1)
109 (GroupedText lt2 label2 score2 group2 s2 stem2 nodes2)
110 | score1 >= score2 = GroupedText lt label1 score1 (Set.insert label2 gr) s1 stem1 nodes
111 | otherwise = GroupedText lt label2 score2 (Set.insert label1 gr) s2 stem2 nodes
112 where
113 lt = lt1 <> lt2
114 gr = Set.union group1 group2
115 nodes = Set.union nodes1 nodes2
116
117 ------------------------------------------------------------------------
118
119 toGroupedText_FlowListScores :: Ord a
120 => Map Text (Set NodeId)
121 -> Map Text FlowListScores
122 -> Map Text (GroupedText a)
123 toGroupedText_FlowListScores = undefined
124
125
126
127 toGroupedText_FlowListScores' :: Ord a
128 => Map Text (Set NodeId)
129 -> Map Text FlowListScores
130 -> ( [(Text, Set NodeId)]
131 , Map Text (GroupedText a)
132 )
133 toGroupedText_FlowListScores' = undefined
134
135
136 ------------------------------------------------------------------------
137 type Stem = Text
138 type Label = Text
139 data GroupedText score =
140 GroupedText { _gt_listType :: !(Maybe ListType)
141 , _gt_label :: !Label
142 , _gt_score :: !score
143 , _gt_children :: !(Set Text)
144 , _gt_size :: !Int
145 , _gt_stem :: !Stem
146 , _gt_nodes :: !(Set NodeId)
147 } {-deriving Show--}
148 --{-
149 instance Show score => Show (GroupedText score) where
150 show (GroupedText lt l s _ _ _ _) = show l <> " : " <> show lt <> " : " <> show s
151 --}
152
153 instance (Eq a) => Eq (GroupedText a) where
154 (==) (GroupedText _ _ score1 _ _ _ _)
155 (GroupedText _ _ score2 _ _ _ _) = (==) score1 score2
156
157 instance (Eq a, Ord a) => Ord (GroupedText a) where
158 compare (GroupedText _ _ score1 _ _ _ _)
159 (GroupedText _ _ score2 _ _ _ _) = compare score1 score2
160
161 -- Lenses Instances
162 makeLenses 'GroupedText
163
164 ------------------------------------------------------------------------
165 addListType :: Map Text ListType -> GroupedText a -> GroupedText a
166 addListType m g = set gt_listType (hasListType m g) g
167 where
168 hasListType :: Map Text ListType -> GroupedText a -> Maybe ListType
169 hasListType m' (GroupedText _ label _ g' _ _ _) =
170 List.foldl' (<>) Nothing
171 $ map (\t -> Map.lookup t m')
172 $ Set.toList
173 $ Set.insert label g'