]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Group.hs
[FEAT] SocialList with TypeFamilies
[gargantext.git] / src / Gargantext / Core / Text / Group.hs
1 {-|
2 Module : Gargantext.Core.Text.Group
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE TemplateHaskell #-}
13 {-# LANGUAGE ConstraintKinds #-}
14 {-# LANGUAGE TypeFamilies #-}
15
16 module Gargantext.Core.Text.Group
17 where
18
19 import Control.Lens (makeLenses, set, (^.))
20 import Data.Set (Set)
21 import Data.Map (Map)
22 import Data.Text (Text)
23 import Gargantext.Core (Lang(..))
24 import Gargantext.Core.Text (size)
25 import Gargantext.Core.Types (ListType(..)) -- (MasterCorpusId, UserCorpusId)
26 import Gargantext.Database.Admin.Types.Node (NodeId)
27 -- import Gargantext.Core.Text.List.Learn (Model(..))
28 import Gargantext.Core.Text.List.Social.Group (FlowListScores(..), flc_lists)
29 import Gargantext.Core.Text.Terms.Mono.Stem (stem)
30 import Gargantext.Prelude
31 import qualified Data.Set as Set
32 import qualified Data.Map as Map
33 import qualified Data.List as List
34 import qualified Data.Text as Text
35
36 {-
37 data NgramsListBuilder = BuilderStepO { stemSize :: !Int
38 , stemX :: !Int
39 , stopSize :: !StopSize
40 }
41 | BuilderStep1 { withModel :: !Model }
42 | BuilderStepN { withModel :: !Model }
43 | Tficf { nlb_lang :: !Lang
44 , nlb_group1 :: !Int
45 , nlb_group2 :: !Int
46 , nlb_stopSize :: !StopSize
47 , nlb_userCorpusId :: !UserCorpusId
48 , nlb_masterCorpusId :: !MasterCorpusId
49 }
50 -}
51
52 data StopSize = StopSize {unStopSize :: !Int}
53
54 -- | TODO: group with 2 terms only can be
55 -- discussed. Main purpose of this is offering
56 -- a first grouping option to user and get some
57 -- enriched data to better learn and improve that algo
58 data GroupParams = GroupParams { unGroupParams_lang :: !Lang
59 , unGroupParams_len :: !Int
60 , unGroupParams_limit :: !Int
61 , unGroupParams_stopSize :: !StopSize
62 }
63 | GroupIdentity
64
65 ngramsGroup :: GroupParams
66 -> Text
67 -> Text
68 ngramsGroup GroupIdentity = identity
69 ngramsGroup (GroupParams l _m _n _) =
70 Text.intercalate " "
71 . map (stem l)
72 -- . take n
73 . List.sort
74 -- . (List.filter (\t -> Text.length t > m))
75 . Text.splitOn " "
76 . Text.replace "-" " "
77
78 ------------------------------------------------------------------------
79 mergeMapParent :: Map Text (GroupedText b)
80 -> Map Text (Map Text Int)
81 -> Map Text (GroupedText b)
82 mergeMapParent = undefined
83
84 ------------------------------------------------------------------------
85 toGroupedText :: Ord b
86 => (Text -> Text )
87 -> (a -> b )
88 -> (a -> Set Text )
89 -> (a -> Set NodeId)
90 -> [(Text,a)]
91 -> Map Stem (GroupedText b)
92 toGroupedText fun_stem fun_score fun_texts fun_nodeIds from =
93 Map.fromListWith grouping $ map group from
94 where
95 group (t,d) = let t' = fun_stem t
96 in (t', GroupedText
97 Nothing
98 t
99 (fun_score d)
100 (fun_texts d)
101 (size t)
102 t'
103 (fun_nodeIds d)
104 )
105
106 grouping :: Ord a
107 => GroupedText a
108 -> GroupedText a
109 -> GroupedText a
110 grouping (GroupedText lt1 label1 score1 group1 s1 stem1 nodes1)
111 (GroupedText lt2 label2 score2 group2 s2 stem2 nodes2)
112 | score1 >= score2 = GroupedText lt label1 score1 (Set.insert label2 gr) s1 stem1 nodes
113 | otherwise = GroupedText lt label2 score2 (Set.insert label1 gr) s2 stem2 nodes
114 where
115 lt = lt1 <> lt2
116 gr = Set.union group1 group2
117 nodes = Set.union nodes1 nodes2
118
119 ------------------------------------------------------------------------
120 toGroupedText_FlowListScores :: ( FlowList a
121 , Ord a
122 )
123 => [a]
124 -> Map Text FlowListScores
125 -> Map Text (GroupedText b)
126 toGroupedText_FlowListScores = undefined
127
128
129 toGroupedText_FlowListScores' :: ( FlowList a
130 , b ~ GroupFamily a
131 )
132 => [a]
133 -> Map Text FlowListScores
134 -> ( [a]
135 , Map Text (GroupedText b)
136 )
137 toGroupedText_FlowListScores' ms mf = foldl' fun_group start ms
138 where
139 start = ([], Map.empty)
140 fun_group (left, grouped) current =
141 case Map.lookup (hasNgrams current) mf of
142 Just scores -> (left, Map.alter (updateWith scores current) (hasNgrams current) grouped)
143 Nothing -> (current : left, grouped)
144 updateWith scores current Nothing = Just $ createGroupWith scores current
145 updateWith scores current (Just x) = Just $ updateGroupWith scores current x
146
147 type FlowList a = (HasNgrams a, HasGroup a)
148
149 class HasNgrams a where
150 hasNgrams :: a -> Text
151
152 class HasGroup a where
153 createGroupWith :: (b ~ GroupFamily a) => FlowListScores -> a -> GroupedText b
154 updateGroupWith :: (b ~ GroupFamily a)
155 => FlowListScores -> a
156 -> GroupedText b
157 -> GroupedText b
158
159 -- | Check if functional dependency is better
160 type family GroupFamily a
161 type instance GroupFamily (Text, Set NodeId) = Int
162
163 ------------------------------------------
164 instance HasGroup (Text, Set NodeId) where
165 createGroupWith fs (t, ns) = GroupedText (mapMax $ fs ^. flc_lists)
166 t
167 (Set.size ns)
168 Set.empty
169 (size t)
170 t
171 ns
172 updateGroupWith fs (t, ns) g = undefined
173
174 mapMax :: Map a b -> Maybe a
175 mapMax m = (fst . fst) <$> Map.maxViewWithKey m
176 ------------------------------------------------------------------------
177 type Stem = Text
178 type Label = Text
179 data GroupedText score =
180 GroupedText { _gt_listType :: !(Maybe ListType)
181 , _gt_label :: !Label
182 , _gt_score :: !score
183 , _gt_children :: !(Set Text)
184 , _gt_size :: !Int
185 , _gt_stem :: !Stem
186 , _gt_nodes :: !(Set NodeId)
187 } {-deriving Show--}
188 --{-
189 instance Show score => Show (GroupedText score) where
190 show (GroupedText lt l s _ _ _ _) = show l <> " : " <> show lt <> " : " <> show s
191 --}
192
193 instance (Eq a) => Eq (GroupedText a) where
194 (==) (GroupedText _ _ score1 _ _ _ _)
195 (GroupedText _ _ score2 _ _ _ _) = (==) score1 score2
196
197 instance (Eq a, Ord a) => Ord (GroupedText a) where
198 compare (GroupedText _ _ score1 _ _ _ _)
199 (GroupedText _ _ score2 _ _ _ _) = compare score1 score2
200
201 -- Lenses Instances
202 makeLenses 'GroupedText
203
204 ------------------------------------------------------------------------
205 addListType :: Map Text ListType -> GroupedText a -> GroupedText a
206 addListType m g = set gt_listType (hasListType m g) g
207 where
208 hasListType :: Map Text ListType -> GroupedText a -> Maybe ListType
209 hasListType m' (GroupedText _ label _ g' _ _ _) =
210 List.foldl' (<>) Nothing
211 $ map (\t -> Map.lookup t m')
212 $ Set.toList
213 $ Set.insert label g'