2 Module : Gargantext.Core.Text.List.Learn
3 Description : Learn to make lists
4 Copyright : (c) CNRS, 2018-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 CSV parser for Gargantext corpus files.
14 {-# OPTIONS_GHC -fno-warn-orphans #-}
17 module Gargantext.Core.Text.List.Learn
20 import Control.Monad.Reader (MonadReader)
21 -- TODO remvoe this deps
22 import Gargantext.API.Admin.Settings
24 import Gargantext.Core.Types.Main (ListType(..), listTypeId, fromListTypeId)
25 import Gargantext.Prelude
26 import Gargantext.Prelude.Utils
27 import Gargantext.Core.Text.Metrics.Count (occurrencesWith)
28 import qualified Data.IntMap as IntMap
29 import qualified Data.List as List
30 import qualified Data.Map as Map
31 import qualified Data.SVM as SVM
32 import qualified Data.Vector as Vec
34 ------------------------------------------------------------------------
35 train :: Double -> Double -> SVM.Problem -> IO SVM.Model
36 train x y = (SVM.train (SVM.CSvc x) (SVM.RBF y))
38 predict :: SVM.Model -> [Vec.Vector Double] -> IO [Double]
39 predict m vs = mapM (predict' m) vs
41 predict' m' vs' = SVM.predict m' (IntMap.fromList $ (zip [1..]) $ Vec.toList vs')
43 ------------------------------------------------------------------------
44 trainList :: Double -> Double -> Map ListType [Vec.Vector Double] -> IO SVM.Model
45 trainList x y = (train x y) . trainList'
47 trainList' :: Map ListType [Vec.Vector Double] -> SVM.Problem
48 trainList' = mapVec2problem . (Map.mapKeys (fromIntegral . listTypeId))
50 mapVec2problem :: Map Double [Vec.Vector Double] -> SVM.Problem
51 mapVec2problem = List.concat . (map (\(a,as) -> zip (repeat a) as)) . Map.toList . (Map.map vecs2maps)
53 vecs2maps :: [Vec.Vector Double] -> [IntMap.IntMap Double]
54 vecs2maps = map (IntMap.fromList . (zip [1..]) . Vec.toList)
57 predictList :: Model -> [Vec.Vector Double] -> IO [Maybe ListType]
58 predictList (ModelSVM m _ _) vs = map (fromListTypeId . round) <$> predict m vs
60 ------------------------------------------------------------------------
61 data Model = ModelSVM { modelSVM :: SVM.Model
62 , param1 :: Maybe Double
63 , param2 :: Maybe Double
66 instance SaveFile Model
68 saveFile' fp (ModelSVM m _ _) = SVM.saveModel m fp
70 instance ReadFile Model
74 pure $ ModelSVM m Nothing Nothing
76 ------------------------------------------------------------------------
79 -- split list : train / test
80 -- grid parameters on best result on test
82 type Train = Map ListType [Vec.Vector Double]
83 type Tests = Map ListType [Vec.Vector Double]
87 grid :: (MonadReader env m, MonadBase IO m, HasSettings env)
88 => Param -> Param -> Train -> [Tests] -> m (Maybe Model)
89 grid _ _ _ [] = panic "Gargantext.Core.Text.List.Learn.grid : empty test data"
92 grid' :: (MonadReader env m, MonadBase IO m, HasSettings env)
97 grid' x y tr' te' = do
98 model'' <- liftBase $ trainList x y tr'
101 model' = ModelSVM model'' (Just x) (Just y)
103 score' :: [(ListType, Maybe ListType)] -> Map (Maybe Bool) Int
104 score' = occurrencesWith (\(a,b) -> (==) <$> Just a <*> b)
106 score'' :: Map (Maybe Bool) Int -> Double
107 score'' m'' = maybe 0 (\t -> (fromIntegral t)/total) (Map.lookup (Just True) m'')
109 total = fromIntegral $ foldl (+) 0 $ Map.elems m''
112 let (res, toGuess) = List.unzip
114 $ map (\(k,vs) -> zip (repeat k) vs)
117 res' <- liftBase $ predictList m toGuess
118 pure $ score'' $ score' $ List.zip res res'
120 score <- mapM (getScore model') te'
121 pure (mean score, model')
123 r <- head . List.reverse
125 <$> mapM (\(x,y) -> grid' x y tr te)
126 [(x,y) | x <- [s..e], y <- [s..e]]
128 printDebug "GRID SEARCH" (map fst r)
129 --printDebug "file" fp
130 --fp <- saveFile (ModelSVM model')