]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Group.hs
[Social List] some fixes before integration
[gargantext.git] / src / Gargantext / Core / Text / Group.hs
1 {-|
2 Module : Gargantext.Core.Text.Group
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE TemplateHaskell #-}
13
14 module Gargantext.Core.Text.Group
15 where
16
17 import Control.Lens (makeLenses, set)
18 import Data.Set (Set)
19 import Data.Map (Map)
20 import Data.Text (Text)
21 import Gargantext.Core (Lang(..))
22 import Gargantext.Core.Text (size)
23 import Gargantext.Core.Types (ListType(..)) -- (MasterCorpusId, UserCorpusId)
24 import Gargantext.Database.Admin.Types.Node (NodeId)
25 -- import Gargantext.Core.Text.List.Learn (Model(..))
26 import Gargantext.Core.Text.Terms.Mono.Stem (stem)
27 import Gargantext.Prelude
28 import qualified Data.Set as Set
29 import qualified Data.Map as Map
30 import qualified Data.List as List
31 import qualified Data.Text as Text
32
33 {-
34 data NgramsListBuilder = BuilderStepO { stemSize :: !Int
35 , stemX :: !Int
36 , stopSize :: !StopSize
37 }
38 | BuilderStep1 { withModel :: !Model }
39 | BuilderStepN { withModel :: !Model }
40 | Tficf { nlb_lang :: !Lang
41 , nlb_group1 :: !Int
42 , nlb_group2 :: !Int
43 , nlb_stopSize :: !StopSize
44 , nlb_userCorpusId :: !UserCorpusId
45 , nlb_masterCorpusId :: !MasterCorpusId
46 }
47 -}
48
49 data StopSize = StopSize {unStopSize :: !Int}
50
51 -- | TODO: group with 2 terms only can be
52 -- discussed. Main purpose of this is offering
53 -- a first grouping option to user and get some
54 -- enriched data to better learn and improve that algo
55 data GroupParams = GroupParams { unGroupParams_lang :: !Lang
56 , unGroupParams_len :: !Int
57 , unGroupParams_limit :: !Int
58 , unGroupParams_stopSize :: !StopSize
59 }
60 | GroupIdentity
61
62 ngramsGroup :: GroupParams
63 -> Text
64 -> Text
65 ngramsGroup GroupIdentity = identity
66 ngramsGroup (GroupParams l _m _n _) = Text.intercalate " "
67 . map (stem l)
68 -- . take n
69 . List.sort
70 -- . (List.filter (\t -> Text.length t > m))
71 . Text.splitOn " "
72 . Text.replace "-" " "
73
74 ------------------------------------------------------------------------
75 mergeMapParent :: Map Text (GroupedText b)
76 -> Map Text (Map Text Int)
77 -> Map Text (GroupedText b)
78 mergeMapParent = undefined
79
80 ------------------------------------------------------------------------
81 toGroupedText :: Ord b
82 => (Text -> Text)
83 -> (a -> b)
84 -> (a -> Set Text)
85 -> (a -> Set NodeId)
86 -> [(Text,a)]
87 -> Map Stem (GroupedText b)
88 toGroupedText fun_stem fun_score fun_texts fun_nodeIds from = groupStems' $ map group from
89 where
90 group (t,d) = let t' = fun_stem t
91 in (t', GroupedText
92 Nothing
93 t
94 (fun_score d)
95 (fun_texts d)
96 (size t)
97 t'
98 (fun_nodeIds d)
99 )
100
101 groupStems :: [(Stem, GroupedText Double)] -> [GroupedText Double]
102 groupStems = Map.elems . groupStems'
103
104 groupStems' :: Ord a => [(Stem, GroupedText a)] -> Map Stem (GroupedText a)
105 groupStems' = Map.fromListWith grouping
106 where
107 grouping (GroupedText lt1 label1 score1 group1 s1 stem1 nodes1)
108 (GroupedText lt2 label2 score2 group2 s2 stem2 nodes2)
109 | score1 >= score2 = GroupedText lt label1 score1 (Set.insert label2 gr) s1 stem1 nodes
110 | otherwise = GroupedText lt label2 score2 (Set.insert label1 gr) s2 stem2 nodes
111 where
112 lt = lt1 <> lt2
113 gr = Set.union group1 group2
114 nodes = Set.union nodes1 nodes2
115
116 ------------------------------------------------------------------------
117 type Group = Lang -> Int -> Int -> Text -> Text
118 type Stem = Text
119 type Label = Text
120 data GroupedText score =
121 GroupedText { _gt_listType :: !(Maybe ListType)
122 , _gt_label :: !Label
123 , _gt_score :: !score
124 , _gt_children :: !(Set Text)
125 , _gt_size :: !Int
126 , _gt_stem :: !Stem
127 , _gt_nodes :: !(Set NodeId)
128 } {-deriving Show--}
129 --{-
130 instance Show score => Show (GroupedText score) where
131 show (GroupedText lt l s _ _ _ _) = show l <> " : " <> show lt <> " : " <> show s
132 --}
133
134 instance (Eq a) => Eq (GroupedText a) where
135 (==) (GroupedText _ _ score1 _ _ _ _)
136 (GroupedText _ _ score2 _ _ _ _) = (==) score1 score2
137
138 instance (Eq a, Ord a) => Ord (GroupedText a) where
139 compare (GroupedText _ _ score1 _ _ _ _)
140 (GroupedText _ _ score2 _ _ _ _) = compare score1 score2
141
142 -- Lenses Instances
143 makeLenses 'GroupedText
144
145 ------------------------------------------------------------------------
146 addListType :: Map Text ListType -> GroupedText a -> GroupedText a
147 addListType m g = set gt_listType (hasListType m g) g
148 where
149 hasListType :: Map Text ListType -> GroupedText a -> Maybe ListType
150 hasListType m' (GroupedText _ label _ g' _ _ _) =
151 List.foldl' (<>) Nothing
152 $ map (\t -> Map.lookup t m')
153 $ Set.toList
154 $ Set.insert label g'
155
156