2 Module : Gargantext.Core.Text.List.Learn
3 Description : Learn to make lists
4 Copyright : (c) CNRS, 2018-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 CSV parser for Gargantext corpus files.
14 {-# OPTIONS_GHC -fno-warn-orphans #-}
17 module Gargantext.Core.Text.List.Learn
20 import Control.Monad.Reader (MonadReader)
21 -- TODO remvoe this deps
22 import qualified Data.IntMap as IntMap
23 import qualified Data.List as List
25 import qualified Data.Map as Map
26 import qualified Data.SVM as SVM
27 import qualified Data.Vector as Vec
29 import Gargantext.API.Admin.Types
30 import Gargantext.Core.Text.Metrics.Count (occurrencesWith)
31 import Gargantext.Core.Types.Main (ListType(..), listTypeId, fromListTypeId)
32 import Gargantext.Prelude
33 import Gargantext.Prelude.Utils
35 ------------------------------------------------------------------------
36 train :: Double -> Double -> SVM.Problem -> IO SVM.Model
37 train x y = (SVM.train (SVM.CSvc x) (SVM.RBF y))
39 predict :: SVM.Model -> [Vec.Vector Double] -> IO [Double]
40 predict m vs = mapM (predict' m) vs
42 predict' m' vs' = SVM.predict m' (IntMap.fromList $ (zip [1..]) $ Vec.toList vs')
44 ------------------------------------------------------------------------
45 trainList :: Double -> Double -> Map ListType [Vec.Vector Double] -> IO SVM.Model
46 trainList x y = (train x y) . trainList'
48 trainList' :: Map ListType [Vec.Vector Double] -> SVM.Problem
49 trainList' = mapVec2problem . (Map.mapKeys (fromIntegral . listTypeId))
51 mapVec2problem :: Map Double [Vec.Vector Double] -> SVM.Problem
52 mapVec2problem = List.concat . (map (\(a,as) -> zip (repeat a) as)) . Map.toList . (Map.map vecs2maps)
54 vecs2maps :: [Vec.Vector Double] -> [IntMap.IntMap Double]
55 vecs2maps = map (IntMap.fromList . (zip [1..]) . Vec.toList)
58 predictList :: Model -> [Vec.Vector Double] -> IO [Maybe ListType]
59 predictList (ModelSVM m _ _) vs = map (fromListTypeId . round) <$> predict m vs
61 ------------------------------------------------------------------------
62 data Model = ModelSVM { modelSVM :: SVM.Model
63 , param1 :: Maybe Double
64 , param2 :: Maybe Double
67 instance SaveFile Model
69 saveFile' fp (ModelSVM m _ _) = SVM.saveModel m fp
71 instance ReadFile Model
75 pure $ ModelSVM m Nothing Nothing
77 ------------------------------------------------------------------------
80 -- split list : train / test
81 -- grid parameters on best result on test
83 type Train = Map ListType [Vec.Vector Double]
84 type Tests = Map ListType [Vec.Vector Double]
88 grid :: (MonadReader env m, MonadBase IO m, HasSettings env)
89 => Param -> Param -> Train -> [Tests] -> m (Maybe Model)
90 grid _ _ _ [] = panic "Gargantext.Core.Text.List.Learn.grid : empty test data"
93 grid' :: (MonadReader env m, MonadBase IO m, HasSettings env)
98 grid' x y tr' te' = do
99 model'' <- liftBase $ trainList x y tr'
102 model' = ModelSVM model'' (Just x) (Just y)
104 score' :: [(ListType, Maybe ListType)] -> Map (Maybe Bool) Int
105 score' = occurrencesWith (\(a,b) -> (==) <$> Just a <*> b)
107 score'' :: Map (Maybe Bool) Int -> Double
108 score'' m'' = maybe 0 (\t -> (fromIntegral t)/total) (Map.lookup (Just True) m'')
110 total = fromIntegral $ foldl (+) 0 $ Map.elems m''
113 let (res, toGuess) = List.unzip
115 $ map (\(k,vs) -> zip (repeat k) vs)
118 res' <- liftBase $ predictList m toGuess
119 pure $ score'' $ score' $ List.zip res res'
121 score <- mapM (getScore model') te'
122 pure (mean score, model')
124 r <- head . List.reverse
126 <$> mapM (\(x,y) -> grid' x y tr te)
127 [(x,y) | x <- [s..e], y <- [s..e]]
129 printDebug "GRID SEARCH" (map fst r)
130 --printDebug "file" fp
131 --fp <- saveFile (ModelSVM model')