2 Module : Gargantext.API.Node.DocumentsFromWriteNodes
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE MonoLocalBinds #-}
13 {-# LANGUAGE TemplateHaskell #-}
14 {-# LANGUAGE TypeOperators #-}
16 module Gargantext.API.Node.DocumentsFromWriteNodes
20 import Control.Lens ((^.))
22 import Data.Either (Either(..), rights)
24 import qualified Data.Text as T
25 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
26 import Gargantext.API.Admin.Types (HasSettings)
27 import Gargantext.API.Job (jobLogSuccess, jobLogFailTotalWithMessage)
28 import Gargantext.API.Prelude (GargServer)
29 import Gargantext.Core (Lang(..))
30 import Gargantext.Core.Text.Corpus.Parsers.FrameWrite
31 import Gargantext.Core.Text.Terms (TermType(..))
32 import Gargantext.Core.Types.Individu (User(..))
33 import Gargantext.Database.Action.Flow (flowDataText, DataText(..))
34 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
35 import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
36 import Gargantext.Database.Admin.Types.Hyperdata.Frame
37 import Gargantext.Database.Admin.Types.Node
38 import Gargantext.Database.Query.Table.Node (getChildrenByType, getClosestParentIdByType', getNodeWith)
39 import Gargantext.Database.Schema.Node (node_hyperdata)
40 import Gargantext.Prelude
41 import GHC.Generics (Generic)
43 import Servant.Job.Async (JobFunction(..), serveJobsAPI)
45 ------------------------------------------------------------------------
46 type API = Summary " Documents from Write nodes."
47 :> AsyncJobs JobLog '[JSON] Params JobLog
48 ------------------------------------------------------------------------
49 newtype Params = Params { id :: Int }
50 deriving (Generic, Show)
52 instance FromJSON Params where
53 parseJSON = genericParseJSON defaultOptions
54 instance ToJSON Params where
55 toJSON = genericToJSON defaultOptions
56 instance ToSchema Params
57 ------------------------------------------------------------------------
58 api :: UserId -> NodeId -> GargServer API
61 JobFunction (\p log'' ->
65 in documentsFromWriteNodes uId nId p (liftBase . log')
68 documentsFromWriteNodes :: (HasSettings env, FlowCmdM env err m)
74 documentsFromWriteNodes uId nId _p logStatus = do
75 let jobLog = JobLog { _scst_succeeded = Just 1
76 , _scst_failed = Just 0
77 , _scst_remaining = Just 1
78 , _scst_events = Just []
82 mcId <- getClosestParentIdByType' nId NodeCorpus
86 let msg = T.pack $ "[G.A.N.DFWN] Node has no corpus parent: " <> show nId
87 logStatus $ jobLogFailTotalWithMessage msg jobLog
90 frameWriteIds <- getChildrenByType nId NodeFrameWrite
92 -- https://write.frame.gargantext.org/<frame_id>/download
93 frameWrites <- mapM (\id -> getNodeWith id (Proxy :: Proxy HyperdataFrame)) frameWriteIds
95 frameWritesWithContents <- liftBase $
97 contents <- getHyperdataFrameContents (node ^. node_hyperdata)
101 let parsedE = (\(node, contents) -> hyperdataDocumentFromFrameWrite (node ^. node_hyperdata, contents)) <$> frameWritesWithContents
102 let parsed = rights parsedE
104 _ <- flowDataText (RootId (NodeId uId)) (DataNew $ yieldMany parsed) (Multi EN) cId Nothing logStatus
106 pure $ jobLogSuccess jobLog
107 ------------------------------------------------------------------------
108 hyperdataDocumentFromFrameWrite :: (HyperdataFrame, T.Text) -> Either T.Text HyperdataDocument
109 hyperdataDocumentFromFrameWrite (HyperdataFrame { _hf_base, _hf_frame_id }, contents) =
110 case parseLines contents of
111 Left _ -> Left "Error parsing node"
112 Right (Parsed { authors, contents = c, date, source, title = t }) ->
113 let authorJoinSingle (Author { firstName, lastName }) = T.concat [ lastName, ", ", firstName ]
114 authors' = T.concat $ authorJoinSingle <$> authors
115 date' = (\(Date { year, month, day }) -> T.concat [ T.pack $ show year, "-"
116 , T.pack $ show month, "-"
117 , T.pack $ show day ]) <$> date
118 year' = fromIntegral $ maybe 2021 (\(Date { year }) -> year) date
119 month' = fromIntegral $ maybe 10 (\(Date { month }) -> month) date
120 day' = fromIntegral $ maybe 4 (\(Date { day }) -> day) date in
121 Right HyperdataDocument { _hd_bdd = Just "FrameWrite"
124 , _hd_uniqId = Nothing
125 , _hd_uniqIdBdd = Nothing
128 , _hd_authors = Just authors'
129 , _hd_institutes = Nothing
130 , _hd_source = source
131 , _hd_abstract = Just c
132 , _hd_publication_date = date'
133 , _hd_publication_year = Just year'
134 , _hd_publication_month = Just month'
135 , _hd_publication_day = Just day'
136 , _hd_publication_hour = Nothing
137 , _hd_publication_minute = Nothing
138 , _hd_publication_second = Nothing
139 , _hd_language_iso2 = Just $ T.pack $ show EN }