2 Module : Gargantext.API.Ngrams.List
3 Description : Get Ngrams (lists)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE TemplateHaskell #-}
13 {-# LANGUAGE TypeOperators #-}
15 module Gargantext.API.Ngrams.List
18 import Data.Maybe (catMaybes)
19 import Control.Lens hiding (elements)
21 import Data.Map (toList, fromList)
22 import Data.Swagger (ToSchema, declareNamedSchema, genericDeclareNamedSchema)
23 import Data.Text (Text, concat, pack)
24 import GHC.Generics (Generic)
25 import Gargantext.API.Admin.Orchestrator.Types
26 import Gargantext.API.Ngrams (getNgramsTableMap, setListNgrams, NgramsTerm)
27 import Gargantext.API.Ngrams.Types (RepoCmdM, Versioned(..), NgramsList, NgramsTerm(..))
28 import Gargantext.API.Node.Corpus.New.File (FileType(..))
29 import Gargantext.API.Prelude (GargServer, GargNoServer)
30 import Gargantext.Core.Utils.Prefix (unPrefixSwagger)
31 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
32 import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
33 import Gargantext.Database.Schema.Node
34 import Gargantext.Database.Admin.Types.Node
35 import Gargantext.Database.Admin.Types.Hyperdata.Document
36 import Gargantext.Database.Schema.Ngrams (ngramsTypes, NgramsType(..))
37 import Gargantext.Database.Query.Table.Node (getDocumentsWithParentId)
38 import Gargantext.Prelude
39 import Gargantext.Core.Text.Terms.WithList (buildPatterns, termsInText)
40 import Network.HTTP.Media ((//), (/:))
42 import Servant.Job.Async
43 import Servant.Job.Utils (jsonOptions)
44 import Web.FormUrlEncoded (FromForm)
45 import qualified Data.HashMap.Strict as HashMap
46 import qualified Data.Text as Text
48 ------------------------------------------------------------------------
49 type API = Get '[JSON, HTML] (Headers '[Header "Content-Disposition" Text] NgramsList)
50 -- :<|> ReqBody '[JSON] NgramsList :> Post '[JSON] Bool
53 api :: ListId -> GargServer API
54 api l = get l :<|> postAsync l
57 instance Accept HTML where
58 contentType _ = "text" // "html" /: ("charset", "utf-8")
59 instance ToJSON a => MimeRender HTML a where
62 ------------------------------------------------------------------------
63 get :: RepoCmdM env err m =>
64 ListId -> m (Headers '[Header "Content-Disposition" Text] NgramsList)
68 return $ addHeader (concat [ "attachment; filename=GarganText_NgramsList-"
74 get' :: RepoCmdM env err m
75 => ListId -> m NgramsList
78 <$> mapM (getNgramsTableMap lId) ngramsTypes
80 ------------------------------------------------------------------------
83 post :: FlowCmdM env err m
88 -- TODO check with Version for optim
89 _ <- mapM (\(nt, Versioned _v ns) -> setListNgrams l nt ns) $ toList m
94 -----------------------------------------------------------------------------
95 -- | Re-index documents of a corpus with new ngrams (called orphans here)
96 reIndexWith :: CorpusId
101 reIndexWith cId lId nt ts = do
102 docs <- getDocumentsWithParentId cId
104 -- Taking the ngrams with 0 occurrences only (orphans)
105 orphans <- map (\k -> ([unNgramsTerm k], []))
107 <$> HashMap.filter (==0)
108 <$> getOccByNgramsOnlyFast' cId lId nt ts
110 -- Checking Text documents where orphans match
113 map (\doc -> ( doc ^. node_id
114 , termsInText (buildPatterns orphans)
117 [ doc ^. node_hyperdata . hd_title
118 , doc ^. node_hyperdata . hd_abstract
124 -- Saving the indexation in database
128 ------------------------------------------------------------------------
129 ------------------------------------------------------------------------
130 type PostAPI = Summary "Update List"
134 :> AsyncJobs JobLog '[FormUrlEncoded] WithFile JobLog
136 postAsync :: ListId -> GargServer PostAPI
139 JobFunction (\f log' -> postAsync' lId f (liftBase . log'))
141 postAsync' :: FlowCmdM env err m
146 postAsync' l (WithFile _ m _) logStatus = do
148 logStatus JobLog { _scst_succeeded = Just 0
149 , _scst_failed = Just 0
150 , _scst_remaining = Just 1
151 , _scst_events = Just []
155 pure JobLog { _scst_succeeded = Just 1
156 , _scst_failed = Just 0
157 , _scst_remaining = Just 0
158 , _scst_events = Just []
161 data WithFile = WithFile
162 { _wf_filetype :: !FileType
163 , _wf_data :: !NgramsList
165 } deriving (Eq, Show, Generic)
167 makeLenses ''WithFile
168 instance FromForm WithFile
169 instance FromJSON WithFile where
170 parseJSON = genericParseJSON $ jsonOptions "_wf_"
171 instance ToJSON WithFile where
172 toJSON = genericToJSON $ jsonOptions "_wf_"
173 instance ToSchema WithFile where
174 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wf_")