2 Module : Gargantext.API.Node.DocumentsFromWriteNodes
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE MonoLocalBinds #-}
13 {-# LANGUAGE TemplateHaskell #-}
14 {-# LANGUAGE TypeOperators #-}
16 module Gargantext.API.Node.DocumentsFromWriteNodes
19 import Control.Lens ((^.))
21 import Data.Either (Either(..), rights)
23 import qualified Data.Text as T
24 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
25 import Gargantext.API.Admin.Types (HasSettings)
26 import Gargantext.API.Job (jobLogSuccess, jobLogFailTotalWithMessage)
27 import Gargantext.API.Prelude (GargServer)
28 import Gargantext.Core (Lang(..))
29 import Gargantext.Core.Text.Corpus.Parsers.FrameWrite
30 import Gargantext.Core.Text.Terms (TermType(..))
31 import Gargantext.Core.Types.Individu (User(..))
32 import Gargantext.Database.Action.Flow (flowDataText, DataText(..))
33 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
34 import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
35 import Gargantext.Database.Admin.Types.Hyperdata.Frame
36 import Gargantext.Database.Admin.Types.Node
37 import Gargantext.Database.Query.Table.Node (getChildrenByType, getClosestParentIdByType', getNodeWith)
38 import Gargantext.Database.Schema.Node (node_hyperdata)
39 import Gargantext.Prelude
40 import GHC.Generics (Generic)
42 import Servant.Job.Async (JobFunction(..), serveJobsAPI)
44 ------------------------------------------------------------------------
45 type API = Summary " Documents from Write nodes."
46 :> AsyncJobs JobLog '[JSON] Params JobLog
47 ------------------------------------------------------------------------
48 newtype Params = Params { id :: Int }
49 deriving (Generic, Show)
51 instance FromJSON Params where
52 parseJSON = genericParseJSON defaultOptions
53 instance ToJSON Params where
54 toJSON = genericToJSON defaultOptions
55 instance ToSchema Params
56 ------------------------------------------------------------------------
57 api :: UserId -> NodeId -> GargServer API
60 JobFunction (\p log'' ->
64 in documentsFromWriteNodes uId nId p (liftBase . log')
67 documentsFromWriteNodes :: (HasSettings env, FlowCmdM env err m)
73 documentsFromWriteNodes uId nId _p logStatus = do
74 let jobLog = JobLog { _scst_succeeded = Just 1
75 , _scst_failed = Just 0
76 , _scst_remaining = Just 1
77 , _scst_events = Just []
81 mcId <- getClosestParentIdByType' nId NodeCorpus
85 let msg = T.pack $ "[G.A.N.DFWN] Node has no corpus parent: " <> show nId
86 logStatus $ jobLogFailTotalWithMessage msg jobLog
89 frameWriteIds <- getChildrenByType nId NodeFrameWrite
91 -- https://write.frame.gargantext.org/<frame_id>/download
92 frameWrites <- mapM (\id -> getNodeWith id (Proxy :: Proxy HyperdataFrame)) frameWriteIds
94 frameWritesWithContents <- liftBase $
96 contents <- getHyperdataFrameContents (node ^. node_hyperdata)
100 let parsedE = (\(node, contents) -> hyperdataDocumentFromFrameWrite (node ^. node_hyperdata, contents)) <$> frameWritesWithContents
101 let parsed = rights parsedE
103 _ <- flowDataText (RootId (NodeId uId)) (DataNew [parsed]) (Multi EN) cId Nothing logStatus
105 pure $ jobLogSuccess jobLog
106 ------------------------------------------------------------------------
107 hyperdataDocumentFromFrameWrite :: (HyperdataFrame, T.Text) -> Either T.Text HyperdataDocument
108 hyperdataDocumentFromFrameWrite (HyperdataFrame { _hf_base, _hf_frame_id }, contents) =
109 case parseLines contents of
110 Left _ -> Left "Error parsing node"
111 Right (Parsed { authors, contents = c, date, source, title = t }) ->
112 let authorJoinSingle (Author { firstName, lastName }) = T.concat [ lastName, ", ", firstName ]
113 authors' = T.concat $ authorJoinSingle <$> authors
114 date' = (\(Date { year, month, day }) -> T.concat [ T.pack $ show year, "-"
115 , T.pack $ show month, "-"
116 , T.pack $ show day ]) <$> date
117 year' = fromIntegral $ maybe 2021 (\(Date { year }) -> year) date
118 month' = fromIntegral $ maybe 10 (\(Date { month }) -> month) date
119 day' = fromIntegral $ maybe 4 (\(Date { day }) -> day) date in
120 Right HyperdataDocument { _hd_bdd = Just "FrameWrite"
123 , _hd_uniqId = Nothing
124 , _hd_uniqIdBdd = Nothing
127 , _hd_authors = Just authors'
128 , _hd_institutes = Nothing
129 , _hd_source = source
130 , _hd_abstract = Just c
131 , _hd_publication_date = date'
132 , _hd_publication_year = Just year'
133 , _hd_publication_month = Just month'
134 , _hd_publication_day = Just day'
135 , _hd_publication_hour = Nothing
136 , _hd_publication_minute = Nothing
137 , _hd_publication_second = Nothing
138 , _hd_language_iso2 = Just $ T.pack $ show EN }