2 Module : Gargantext.Core.Text.List.Learn
3 Description : Learn to make lists
4 Copyright : (c) CNRS, 2018-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 CSV parser for Gargantext corpus files.
14 {-# OPTIONS_GHC -fno-warn-orphans #-}
17 module Gargantext.Core.Text.List.Learn
20 import qualified Data.IntMap as IntMap
21 import qualified Data.List as List
23 import qualified Data.Map as Map
24 import qualified Data.SVM as SVM
25 import qualified Data.Vector as Vec
27 import Gargantext.Core.Text.Metrics.Count (occurrencesWith)
28 import Gargantext.Core.Types.Main (ListType(..), listTypeId, fromListTypeId)
29 import Gargantext.Prelude
30 import Gargantext.Prelude.Utils
32 ------------------------------------------------------------------------
33 train :: Double -> Double -> SVM.Problem -> IO SVM.Model
34 train x y = (SVM.train (SVM.CSvc x) (SVM.RBF y))
36 predict :: SVM.Model -> [Vec.Vector Double] -> IO [Double]
37 predict m vs = mapM (predict' m) vs
39 predict' m' vs' = SVM.predict m' (IntMap.fromList $ (zip [1..]) $ Vec.toList vs')
41 ------------------------------------------------------------------------
42 trainList :: Double -> Double -> Map ListType [Vec.Vector Double] -> IO SVM.Model
43 trainList x y = (train x y) . trainList'
45 trainList' :: Map ListType [Vec.Vector Double] -> SVM.Problem
46 trainList' = mapVec2problem . (Map.mapKeys (fromIntegral . listTypeId))
48 mapVec2problem :: Map Double [Vec.Vector Double] -> SVM.Problem
49 mapVec2problem = List.concat . (map (\(a,as) -> zip (repeat a) as)) . Map.toList . (Map.map vecs2maps)
51 vecs2maps :: [Vec.Vector Double] -> [IntMap.IntMap Double]
52 vecs2maps = map (IntMap.fromList . (zip [1..]) . Vec.toList)
55 predictList :: Model -> [Vec.Vector Double] -> IO [Maybe ListType]
56 predictList (ModelSVM m _ _) vs = map (fromListTypeId . round) <$> predict m vs
58 ------------------------------------------------------------------------
59 data Model = ModelSVM { modelSVM :: SVM.Model
60 , param1 :: Maybe Double
61 , param2 :: Maybe Double
64 instance SaveFile Model
66 saveFile' fp (ModelSVM m _ _) = SVM.saveModel m fp
68 instance ReadFile Model
72 pure $ ModelSVM m Nothing Nothing
74 ------------------------------------------------------------------------
77 -- split list : train / test
78 -- grid parameters on best result on test
80 type Train = Map ListType [Vec.Vector Double]
81 type Tests = Map ListType [Vec.Vector Double]
85 grid :: (MonadBase IO m)
86 => Param -> Param -> Train -> [Tests] -> m (Maybe Model)
87 grid _ _ _ [] = panic "Gargantext.Core.Text.List.Learn.grid : empty test data"
90 grid' :: (MonadBase IO m)
95 grid' x y tr' te' = do
96 model'' <- liftBase $ trainList x y tr'
99 model' = ModelSVM model'' (Just x) (Just y)
101 score' :: [(ListType, Maybe ListType)] -> Map (Maybe Bool) Int
102 score' = occurrencesWith (\(a,b) -> (==) <$> Just a <*> b)
104 score'' :: Map (Maybe Bool) Int -> Double
105 score'' m'' = maybe 0 (\t -> (fromIntegral t)/total) (Map.lookup (Just True) m'')
107 total = fromIntegral $ foldl (+) 0 $ Map.elems m''
110 let (res, toGuess) = List.unzip
112 $ map (\(k,vs) -> zip (repeat k) vs)
115 res' <- liftBase $ predictList m toGuess
116 pure $ score'' $ score' $ List.zip res res'
118 score <- mapM (getScore model') te'
119 pure (mean score, model')
121 r <- head . List.reverse
123 <$> mapM (\(x,y) -> grid' x y tr te)
124 [(x,y) | x <- [s..e], y <- [s..e]]
126 printDebug "GRID SEARCH" (map fst r)
127 --printDebug "file" fp
128 --fp <- saveFile (ModelSVM model')