2 Module : Gargantext.API.Node.DocumentsFromWriteNodes
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE MonoLocalBinds #-}
13 {-# LANGUAGE TemplateHaskell #-}
14 {-# LANGUAGE TypeOperators #-}
16 module Gargantext.API.Node.DocumentsFromWriteNodes
20 import Control.Lens ((^.))
22 import Data.Either (Either(..), rights)
24 import qualified Data.Text as T
25 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
26 import Gargantext.API.Admin.Types (HasSettings)
27 import Gargantext.API.Job (jobLogSuccess, jobLogFailTotalWithMessage)
28 import Gargantext.API.Prelude (GargServer)
29 import Gargantext.Core (Lang(..))
30 import Gargantext.Core.Text.Corpus.Parsers.FrameWrite
31 import Gargantext.Core.Text.Terms (TermType(..))
32 import Gargantext.Core.Types.Individu (User(..))
33 import Gargantext.Database.Action.Flow (flowDataText, DataText(..))
34 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
35 import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
36 import Gargantext.Database.Admin.Types.Hyperdata.Frame
37 import Gargantext.Database.Admin.Types.Node
38 import Gargantext.Database.Query.Table.Node (getChildrenByType, getClosestParentIdByType', getNodeWith)
39 import Gargantext.Database.Schema.Node (node_hyperdata)
40 import qualified Gargantext.Defaults as Defaults
41 import Gargantext.Prelude
42 import GHC.Generics (Generic)
44 import Servant.Job.Async (JobFunction(..), serveJobsAPI)
46 ------------------------------------------------------------------------
47 type API = Summary " Documents from Write nodes."
48 :> AsyncJobs JobLog '[JSON] Params JobLog
49 ------------------------------------------------------------------------
50 newtype Params = Params { id :: Int }
51 deriving (Generic, Show)
53 instance FromJSON Params where
54 parseJSON = genericParseJSON defaultOptions
55 instance ToJSON Params where
56 toJSON = genericToJSON defaultOptions
57 instance ToSchema Params
58 ------------------------------------------------------------------------
59 api :: UserId -> NodeId -> GargServer API
62 JobFunction (\p log'' ->
66 in documentsFromWriteNodes uId nId p (liftBase . log')
69 documentsFromWriteNodes :: (HasSettings env, FlowCmdM env err m)
75 documentsFromWriteNodes uId nId _p logStatus = do
76 let jobLog = JobLog { _scst_succeeded = Just 1
77 , _scst_failed = Just 0
78 , _scst_remaining = Just 1
79 , _scst_events = Just []
83 mcId <- getClosestParentIdByType' nId NodeCorpus
87 let msg = T.pack $ "[G.A.N.DFWN] Node has no corpus parent: " <> show nId
88 logStatus $ jobLogFailTotalWithMessage msg jobLog
91 frameWriteIds <- getChildrenByType nId NodeFrameWrite
93 -- https://write.frame.gargantext.org/<frame_id>/download
94 frameWrites <- mapM (\id -> getNodeWith id (Proxy :: Proxy HyperdataFrame)) frameWriteIds
96 frameWritesWithContents <- liftBase $
98 contents <- getHyperdataFrameContents (node ^. node_hyperdata)
102 let parsedE = (\(node, contents) -> hyperdataDocumentFromFrameWrite (node ^. node_hyperdata, contents)) <$> frameWritesWithContents
103 let parsed = rights parsedE
105 _ <- flowDataText (RootId (NodeId uId)) (DataNew (Just $ fromIntegral $ length parsed, yieldMany parsed)) (Multi EN) cId Nothing logStatus
107 pure $ jobLogSuccess jobLog
108 ------------------------------------------------------------------------
109 hyperdataDocumentFromFrameWrite :: (HyperdataFrame, T.Text) -> Either T.Text HyperdataDocument
110 hyperdataDocumentFromFrameWrite (HyperdataFrame { _hf_base, _hf_frame_id }, contents) =
111 case parseLines contents of
112 Left _ -> Left "Error parsing node"
113 Right (Parsed { authors, contents = c, date, source, title = t }) ->
114 let authorJoinSingle (Author { firstName, lastName }) = T.concat [ lastName, ", ", firstName ]
115 authors' = T.concat $ authorJoinSingle <$> authors
116 date' = (\(Date { year, month, day }) -> T.concat [ T.pack $ show year, "-"
117 , T.pack $ show month, "-"
118 , T.pack $ show day ]) <$> date
119 year' = fromIntegral $ maybe Defaults.year (\(Date { year }) -> year) date
120 month' = maybe Defaults.month (\(Date { month }) -> fromIntegral month) date
121 day' = maybe Defaults.day (\(Date { day }) -> fromIntegral day) date in
122 Right HyperdataDocument { _hd_bdd = Just "FrameWrite"
125 , _hd_uniqId = Nothing
126 , _hd_uniqIdBdd = Nothing
129 , _hd_authors = Just authors'
130 , _hd_institutes = Nothing
131 , _hd_source = source
132 , _hd_abstract = Just c
133 , _hd_publication_date = date'
134 , _hd_publication_year = Just year'
135 , _hd_publication_month = Just month'
136 , _hd_publication_day = Just day'
137 , _hd_publication_hour = Nothing
138 , _hd_publication_minute = Nothing
139 , _hd_publication_second = Nothing
140 , _hd_language_iso2 = Just $ T.pack $ show EN }