]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Group.hs
[REFACT] Group fun and types
[gargantext.git] / src / Gargantext / Core / Text / Group.hs
1 {-|
2 Module : Gargantext.Core.Text.Group
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE TemplateHaskell #-}
13
14 module Gargantext.Core.Text.Group
15 where
16
17 import Control.Lens (makeLenses, set)
18 import Data.Set (Set)
19 import Data.Map (Map)
20 import Data.Text (Text)
21 import Gargantext.Core (Lang(..))
22 import Gargantext.Core.Types (ListType(..))
23 import Gargantext.Database.Admin.Types.Node (NodeId)
24 import Gargantext.Core.Text.List.Learn (Model(..))
25 import Gargantext.Core.Types (MasterCorpusId, UserCorpusId)
26 import Gargantext.Core.Text.Terms.Mono.Stem (stem)
27 import Gargantext.Prelude
28 import qualified Data.Set as Set
29 import qualified Data.Map as Map
30 import qualified Data.List as List
31 import qualified Data.Text as Text
32
33 data NgramsListBuilder = BuilderStepO { stemSize :: !Int
34 , stemX :: !Int
35 , stopSize :: !StopSize
36 }
37 | BuilderStep1 { withModel :: !Model }
38 | BuilderStepN { withModel :: !Model }
39 | Tficf { nlb_lang :: !Lang
40 , nlb_group1 :: !Int
41 , nlb_group2 :: !Int
42 , nlb_stopSize :: !StopSize
43 , nlb_userCorpusId :: !UserCorpusId
44 , nlb_masterCorpusId :: !MasterCorpusId
45 }
46
47 data StopSize = StopSize {unStopSize :: !Int}
48
49 -- | TODO: group with 2 terms only can be
50 -- discussed. Main purpose of this is offering
51 -- a first grouping option to user and get some
52 -- enriched data to better learn and improve that algo
53
54
55 ngramsGroup :: Lang
56 -> Int
57 -> Int
58 -> Text
59 -> Text
60 ngramsGroup l _m _n = Text.intercalate " "
61 . map (stem l)
62 -- . take n
63 . List.sort
64 -- . (List.filter (\t -> Text.length t > m))
65 . Text.splitOn " "
66 . Text.replace "-" " "
67
68 ------------------------------------------------------------------------------
69 type Group = Lang -> Int -> Int -> Text -> Text
70 type Stem = Text
71 type Label = Text
72 data GroupedText score =
73 GroupedText { _gt_listType :: !(Maybe ListType)
74 , _gt_label :: !Label
75 , _gt_score :: !score
76 , _gt_group :: !(Set Text)
77 , _gt_size :: !Int
78 , _gt_stem :: !Stem
79 , _gt_nodes :: !(Set NodeId)
80 }
81 instance Show score => Show (GroupedText score) where
82 show (GroupedText _ l s _ _ _ _) = show l <> ":" <> show s
83
84 instance (Eq a) => Eq (GroupedText a) where
85 (==) (GroupedText _ _ score1 _ _ _ _)
86 (GroupedText _ _ score2 _ _ _ _) = (==) score1 score2
87
88 instance (Eq a, Ord a) => Ord (GroupedText a) where
89 compare (GroupedText _ _ score1 _ _ _ _)
90 (GroupedText _ _ score2 _ _ _ _) = compare score1 score2
91
92 -- Lenses Instances
93 makeLenses 'GroupedText
94
95 ------------------------------------------------------------------------------
96 addListType :: Map Text ListType -> GroupedText a -> GroupedText a
97 addListType m g = set gt_listType lt g
98 where
99 lt = hasListType m g
100
101 hasListType :: Map Text ListType -> GroupedText a -> Maybe ListType
102 hasListType m (GroupedText _ label _ g _ _ _) =
103 List.foldl' (<>) Nothing
104 $ map (\t -> Map.lookup t m)
105 $ Set.toList
106 $ Set.insert label g
107
108
109
110
111