]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Group.hs
[Clean] code
[gargantext.git] / src / Gargantext / Core / Text / Group.hs
1 {-|
2 Module : Gargantext.Core.Text.Group
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE TemplateHaskell #-}
13
14 module Gargantext.Core.Text.Group
15 where
16
17 import Control.Lens (makeLenses, set)
18 import Data.Set (Set)
19 import Data.Map (Map)
20 import Data.Text (Text)
21 import Gargantext.Core (Lang(..))
22 import Gargantext.Core.Types (ListType(..))
23 import Gargantext.Database.Admin.Types.Node (NodeId)
24 import Gargantext.Core.Text.List.Learn (Model(..))
25 import Gargantext.Core.Types (MasterCorpusId, UserCorpusId)
26 import Gargantext.Core.Text.Terms.Mono.Stem (stem)
27 import Gargantext.Prelude
28 import qualified Data.Set as Set
29 import qualified Data.Map as Map
30 import qualified Data.List as List
31 import qualified Data.Text as Text
32
33 data NgramsListBuilder = BuilderStepO { stemSize :: !Int
34 , stemX :: !Int
35 , stopSize :: !StopSize
36 }
37 | BuilderStep1 { withModel :: !Model }
38 | BuilderStepN { withModel :: !Model }
39 | Tficf { nlb_lang :: !Lang
40 , nlb_group1 :: !Int
41 , nlb_group2 :: !Int
42 , nlb_stopSize :: !StopSize
43 , nlb_userCorpusId :: !UserCorpusId
44 , nlb_masterCorpusId :: !MasterCorpusId
45 }
46
47 data StopSize = StopSize {unStopSize :: !Int}
48
49 -- | TODO: group with 2 terms only can be
50 -- discussed. Main purpose of this is offering
51 -- a first grouping option to user and get some
52 -- enriched data to better learn and improve that algo
53
54 data GroupParams = GroupParams { unGroupParams_lang :: !Lang
55 , unGroupParams_len :: !Int
56 , unGroupParams_limit :: !Int
57 , unGroupParams_stopSize :: !StopSize
58 }
59
60 ngramsGroup :: GroupParams
61 -> Text
62 -> Text
63 ngramsGroup (GroupParams l _m _n _) = Text.intercalate " "
64 . map (stem l)
65 -- . take n
66 . List.sort
67 -- . (List.filter (\t -> Text.length t > m))
68 . Text.splitOn " "
69 . Text.replace "-" " "
70
71 ------------------------------------------------------------------------------
72 type Group = Lang -> Int -> Int -> Text -> Text
73 type Stem = Text
74 type Label = Text
75 data GroupedText score =
76 GroupedText { _gt_listType :: !(Maybe ListType)
77 , _gt_label :: !Label
78 , _gt_score :: !score
79 , _gt_group :: !(Set Text)
80 , _gt_size :: !Int
81 , _gt_stem :: !Stem
82 , _gt_nodes :: !(Set NodeId)
83 }
84 instance Show score => Show (GroupedText score) where
85 show (GroupedText _ l s _ _ _ _) = show l <> ":" <> show s
86
87 instance (Eq a) => Eq (GroupedText a) where
88 (==) (GroupedText _ _ score1 _ _ _ _)
89 (GroupedText _ _ score2 _ _ _ _) = (==) score1 score2
90
91 instance (Eq a, Ord a) => Ord (GroupedText a) where
92 compare (GroupedText _ _ score1 _ _ _ _)
93 (GroupedText _ _ score2 _ _ _ _) = compare score1 score2
94
95 -- Lenses Instances
96 makeLenses 'GroupedText
97
98 ------------------------------------------------------------------------------
99 addListType :: Map Text ListType -> GroupedText a -> GroupedText a
100 addListType m g = set gt_listType lt g
101 where
102 lt = hasListType m g
103
104 hasListType :: Map Text ListType -> GroupedText a -> Maybe ListType
105 hasListType m (GroupedText _ label _ g _ _ _) =
106 List.foldl' (<>) Nothing
107 $ map (\t -> Map.lookup t m)
108 $ Set.toList
109 $ Set.insert label g
110
111
112
113
114