2 Module : Gargantext.API.Node.DocumentsFromWriteNodes
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE MonoLocalBinds #-}
13 {-# LANGUAGE TemplateHaskell #-}
14 {-# LANGUAGE TypeOperators #-}
16 module Gargantext.API.Node.DocumentsFromWriteNodes
20 import Control.Lens ((^.))
22 import Data.Either (Either(..), rights)
23 -- import Data.Maybe (fromMaybe)
25 import GHC.Generics (Generic)
26 import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
27 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
28 import Gargantext.API.Admin.Types (HasSettings)
29 import Gargantext.API.Job (jobLogSuccess, jobLogFailTotalWithMessage)
30 import Gargantext.API.Prelude (GargM, GargError)
31 import Gargantext.Core (Lang(..))
32 import Gargantext.Core.Text.Corpus.Parsers.FrameWrite
33 import Gargantext.Core.Text.Terms (TermType(..))
34 import Gargantext.Core.Types.Individu (User(..))
35 import Gargantext.Database.Action.Flow (flowDataText, DataText(..))
36 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
37 import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
38 import Gargantext.Database.Admin.Types.Hyperdata.Frame
39 import Gargantext.Database.Admin.Types.Node
40 import Gargantext.Database.Query.Table.Node (getChildrenByType, getClosestParentIdByType', getNodeWith)
41 import Gargantext.Database.Schema.Node (node_hyperdata, node_name)
42 import Gargantext.Prelude
43 import Gargantext.Utils.Jobs (serveJobsAPI)
45 import qualified Data.List as List
46 import qualified Data.Text as T
47 import qualified Gargantext.Defaults as Defaults
49 ------------------------------------------------------------------------
50 type API = Summary " Documents from Write nodes."
51 :> AsyncJobs JobLog '[JSON] Params JobLog
52 ------------------------------------------------------------------------
53 newtype Params = Params { id :: Int }
54 deriving (Generic, Show)
55 instance FromJSON Params where
56 parseJSON = genericParseJSON defaultOptions
57 instance ToJSON Params where
58 toJSON = genericToJSON defaultOptions
59 instance ToSchema Params
60 ------------------------------------------------------------------------
61 api :: UserId -> NodeId -> ServerT API (GargM Env GargError)
63 serveJobsAPI DocumentFromWriteNodeJob $ \p log'' ->
67 in documentsFromWriteNodes uId nId p (liftBase . log')
69 documentsFromWriteNodes :: (HasSettings env, FlowCmdM env err m)
75 documentsFromWriteNodes uId nId _p logStatus = do
76 let jobLog = JobLog { _scst_succeeded = Just 1
77 , _scst_failed = Just 0
78 , _scst_remaining = Just 1
79 , _scst_events = Just []
83 mcId <- getClosestParentIdByType' nId NodeCorpus
87 let msg = T.pack $ "[G.A.N.DFWN] Node has no corpus parent: " <> show nId
88 logStatus $ jobLogFailTotalWithMessage msg jobLog
91 frameWriteIds <- getChildrenByType nId NodeFrameWrite
93 -- https://write.frame.gargantext.org/<frame_id>/download
94 frameWrites <- mapM (\id -> getNodeWith id (Proxy :: Proxy HyperdataFrame)) frameWriteIds
96 frameWritesWithContents <- liftBase $
98 contents <- getHyperdataFrameContents (node ^. node_hyperdata)
102 let parsedE = map (\(node, contents) -> hyperdataDocumentFromFrameWrite 7 (node, contents)) frameWritesWithContents
103 -- TODO hard coded param should be taken from user
104 let parsed = List.concat $ rights parsedE
106 _ <- flowDataText (RootId (NodeId uId))
107 (DataNew (Just $ fromIntegral $ length parsed, yieldMany parsed))
108 (Multi EN) cId Nothing logStatus
110 pure $ jobLogSuccess jobLog
112 ------------------------------------------------------------------------
113 hyperdataDocumentFromFrameWrite :: Int -> (Node HyperdataFrame, T.Text) -> Either T.Text [HyperdataDocument]
114 hyperdataDocumentFromFrameWrite paragraphSize (node, contents) =
115 case parseLines contents of
116 Left _ -> Left "Error parsing node"
117 Right (Parsed { authors, contents = ctxts, date }) ->
118 let HyperdataFrame { _hf_base, _hf_frame_id } = node ^. node_hyperdata
119 authorJoinSingle (Author { firstName, lastName }) = T.concat [ lastName, ", ", firstName ]
120 authors' = T.concat $ authorJoinSingle <$> authors
121 date' = (\(Date { year, month, day }) -> T.concat [ T.pack $ show year, "-"
122 , T.pack $ show month, "-"
123 , T.pack $ show day ]) <$> date
124 year' = fromIntegral $ maybe Defaults.year (\(Date { year }) -> year) date
125 month' = maybe Defaults.month (\(Date { month }) -> fromIntegral month) date
126 day' = maybe Defaults.day (\(Date { day }) -> fromIntegral day) date in
127 Right (List.map (\(t, ctxt) -> HyperdataDocument { _hd_bdd = Just "FrameWrite"
130 , _hd_uniqId = Nothing
131 , _hd_uniqIdBdd = Nothing
134 , _hd_authors = Just authors'
135 , _hd_institutes = Nothing
136 , _hd_source = Just $ node ^. node_name
137 , _hd_abstract = Just ctxt
138 , _hd_publication_date = date'
139 , _hd_publication_year = Just year'
140 , _hd_publication_month = Just month'
141 , _hd_publication_day = Just day'
142 , _hd_publication_hour = Nothing
143 , _hd_publication_minute = Nothing
144 , _hd_publication_second = Nothing
145 , _hd_language_iso2 = Just $ T.pack $ show EN }
146 ) (text2titleParagraphs paragraphSize ctxts)