]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Group.hs
[FEAT] SocialList with Functional Dependency
[gargantext.git] / src / Gargantext / Core / Text / Group.hs
1 {-|
2 Module : Gargantext.Core.Text.Group
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE TemplateHaskell #-}
13 {-# LANGUAGE ConstraintKinds #-}
14 {-# LANGUAGE TypeFamilies #-}
15 {-# LANGUAGE FunctionalDependencies #-}
16
17
18 module Gargantext.Core.Text.Group
19 where
20
21 import Control.Lens (makeLenses, set, (^.))
22 import Data.Set (Set)
23 import Data.Map (Map)
24 import Data.Text (Text)
25 import Gargantext.Core (Lang(..))
26 import Gargantext.Core.Text (size)
27 import Gargantext.Core.Types (ListType(..)) -- (MasterCorpusId, UserCorpusId)
28 import Gargantext.Database.Admin.Types.Node (NodeId)
29 -- import Gargantext.Core.Text.List.Learn (Model(..))
30 import Gargantext.Core.Text.List.Social.Group (FlowListScores(..), flc_lists)
31 import Gargantext.Core.Text.Terms.Mono.Stem (stem)
32 import Gargantext.Prelude
33 import qualified Data.Set as Set
34 import qualified Data.Map as Map
35 import qualified Data.List as List
36 import qualified Data.Text as Text
37
38 {-
39 data NgramsListBuilder = BuilderStepO { stemSize :: !Int
40 , stemX :: !Int
41 , stopSize :: !StopSize
42 }
43 | BuilderStep1 { withModel :: !Model }
44 | BuilderStepN { withModel :: !Model }
45 | Tficf { nlb_lang :: !Lang
46 , nlb_group1 :: !Int
47 , nlb_group2 :: !Int
48 , nlb_stopSize :: !StopSize
49 , nlb_userCorpusId :: !UserCorpusId
50 , nlb_masterCorpusId :: !MasterCorpusId
51 }
52 -}
53
54 data StopSize = StopSize {unStopSize :: !Int}
55
56 -- | TODO: group with 2 terms only can be
57 -- discussed. Main purpose of this is offering
58 -- a first grouping option to user and get some
59 -- enriched data to better learn and improve that algo
60 data GroupParams = GroupParams { unGroupParams_lang :: !Lang
61 , unGroupParams_len :: !Int
62 , unGroupParams_limit :: !Int
63 , unGroupParams_stopSize :: !StopSize
64 }
65 | GroupIdentity
66
67 ngramsGroup :: GroupParams
68 -> Text
69 -> Text
70 ngramsGroup GroupIdentity = identity
71 ngramsGroup (GroupParams l _m _n _) =
72 Text.intercalate " "
73 . map (stem l)
74 -- . take n
75 . List.sort
76 -- . (List.filter (\t -> Text.length t > m))
77 . Text.splitOn " "
78 . Text.replace "-" " "
79
80 ------------------------------------------------------------------------
81 mergeMapParent :: Map Text (GroupedText b)
82 -> Map Text (Map Text Int)
83 -> Map Text (GroupedText b)
84 mergeMapParent = undefined
85
86 ------------------------------------------------------------------------
87 toGroupedText :: Ord b
88 => (Text -> Text )
89 -> (a -> b )
90 -> (a -> Set Text )
91 -> (a -> Set NodeId)
92 -> [(Text,a)]
93 -> Map Stem (GroupedText b)
94 toGroupedText fun_stem fun_score fun_texts fun_nodeIds from =
95 Map.fromListWith grouping $ map group from
96 where
97 group (t,d) = let t' = fun_stem t
98 in (t', GroupedText
99 Nothing
100 t
101 (fun_score d)
102 (fun_texts d)
103 (size t)
104 t'
105 (fun_nodeIds d)
106 )
107
108 grouping :: Ord a
109 => GroupedText a
110 -> GroupedText a
111 -> GroupedText a
112 grouping (GroupedText lt1 label1 score1 group1 s1 stem1 nodes1)
113 (GroupedText lt2 label2 score2 group2 s2 stem2 nodes2)
114 | score1 >= score2 = GroupedText lt label1 score1 (Set.insert label2 gr) s1 stem1 nodes
115 | otherwise = GroupedText lt label2 score2 (Set.insert label1 gr) s2 stem2 nodes
116 where
117 lt = lt1 <> lt2
118 gr = Set.union group1 group2
119 nodes = Set.union nodes1 nodes2
120
121 ------------------------------------------------------------------------
122 toGroupedText_FlowListScores :: ( FlowList a b
123 , Ord a
124 )
125 => [a]
126 -> Map Text FlowListScores
127 -> Map Text (GroupedText b)
128 toGroupedText_FlowListScores = undefined
129
130
131 toGroupedText_FlowListScores' :: ( FlowList a b
132 )
133 => [a]
134 -> Map Text FlowListScores
135 -> ( [a]
136 , Map Text (GroupedText b)
137 )
138 toGroupedText_FlowListScores' ms mf = foldl' fun_group start ms
139 where
140 start = ([], Map.empty)
141 fun_group (left, grouped) current =
142 case Map.lookup (hasNgrams current) mf of
143 Just scores -> (left, Map.alter (updateWith scores current) (hasNgrams current) grouped)
144 Nothing -> (current : left, grouped)
145 updateWith scores current Nothing = Just $ createGroupWith scores current
146 updateWith scores current (Just x) = Just $ updateGroupWith scores current x
147
148 type FlowList a b = (HasNgrams a, HasGroup a b)
149
150 class HasNgrams a where
151 hasNgrams :: a -> Text
152
153 class HasGroup a b | a -> b where
154 createGroupWith :: FlowListScores -> a -> GroupedText b
155 updateGroupWith :: FlowListScores -> a
156 -> GroupedText b
157 -> GroupedText b
158
159 ------------------------------------------
160 instance HasGroup (Text, Set NodeId) Int where
161 createGroupWith fs (t, ns) = GroupedText (mapMax $ fs ^. flc_lists)
162 t
163 (Set.size ns)
164 Set.empty
165 (size t)
166 t
167 ns
168 updateGroupWith fs (t, ns) g = undefined
169
170 mapMax :: Map a b -> Maybe a
171 mapMax m = (fst . fst) <$> Map.maxViewWithKey m
172 ------------------------------------------------------------------------
173 type Stem = Text
174 type Label = Text
175 data GroupedText score =
176 GroupedText { _gt_listType :: !(Maybe ListType)
177 , _gt_label :: !Label
178 , _gt_score :: !score
179 , _gt_children :: !(Set Text)
180 , _gt_size :: !Int
181 , _gt_stem :: !Stem
182 , _gt_nodes :: !(Set NodeId)
183 } {-deriving Show--}
184 --{-
185 instance Show score => Show (GroupedText score) where
186 show (GroupedText lt l s _ _ _ _) = show l <> " : " <> show lt <> " : " <> show s
187 --}
188
189 instance (Eq a) => Eq (GroupedText a) where
190 (==) (GroupedText _ _ score1 _ _ _ _)
191 (GroupedText _ _ score2 _ _ _ _) = (==) score1 score2
192
193 instance (Eq a, Ord a) => Ord (GroupedText a) where
194 compare (GroupedText _ _ score1 _ _ _ _)
195 (GroupedText _ _ score2 _ _ _ _) = compare score1 score2
196
197 -- Lenses Instances
198 makeLenses 'GroupedText
199
200 ------------------------------------------------------------------------
201 addListType :: Map Text ListType -> GroupedText a -> GroupedText a
202 addListType m g = set gt_listType (hasListType m g) g
203 where
204 hasListType :: Map Text ListType -> GroupedText a -> Maybe ListType
205 hasListType m' (GroupedText _ label _ g' _ _ _) =
206 List.foldl' (<>) Nothing
207 $ map (\t -> Map.lookup t m')
208 $ Set.toList
209 $ Set.insert label g'