2 Module : Gargantext.API.Node.DocumentsFromWriteNodes
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE MonoLocalBinds #-}
13 {-# LANGUAGE TemplateHaskell #-}
14 {-# LANGUAGE TypeOperators #-}
16 module Gargantext.API.Node.DocumentsFromWriteNodes
19 import Control.Lens ((^.))
21 import Data.Either (Either(..), rights)
23 import qualified Data.Text as T
24 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
25 import Gargantext.API.Admin.Types (HasSettings)
26 import Gargantext.API.Job (jobLogSuccess, jobLogFailTotalWithMessage)
27 import Gargantext.API.Prelude (GargServer)
28 import Gargantext.Core (Lang(..))
29 import Gargantext.Core.Text.Corpus.Parsers.FrameWrite
30 import Gargantext.Core.Text.Terms (TermType(..))
31 import Gargantext.Core.Types.Individu (User(..))
32 import Gargantext.Database.Action.Flow (flowDataText, DataText(..))
33 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
34 import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
35 import Gargantext.Database.Admin.Types.Hyperdata.Frame
36 import Gargantext.Database.Admin.Types.Node
37 import Gargantext.Database.Query.Table.Node (getChildrenByType, getClosestParentIdByType', getNodeWith)
38 import Gargantext.Database.Schema.Node (node_hyperdata)
39 import qualified Gargantext.Defaults as Defaults
40 import Gargantext.Prelude
41 import GHC.Generics (Generic)
43 import Servant.Job.Async (JobFunction(..), serveJobsAPI)
45 ------------------------------------------------------------------------
46 type API = Summary " Documents from Write nodes."
47 :> AsyncJobs JobLog '[JSON] Params JobLog
48 ------------------------------------------------------------------------
49 newtype Params = Params { id :: Int }
50 deriving (Generic, Show)
52 instance FromJSON Params where
53 parseJSON = genericParseJSON defaultOptions
54 instance ToJSON Params where
55 toJSON = genericToJSON defaultOptions
56 instance ToSchema Params
57 ------------------------------------------------------------------------
58 api :: UserId -> NodeId -> GargServer API
61 JobFunction (\p log'' ->
65 in documentsFromWriteNodes uId nId p (liftBase . log')
68 documentsFromWriteNodes :: (HasSettings env, FlowCmdM env err m)
74 documentsFromWriteNodes uId nId _p logStatus = do
75 let jobLog = JobLog { _scst_succeeded = Just 1
76 , _scst_failed = Just 0
77 , _scst_remaining = Just 1
78 , _scst_events = Just []
82 mcId <- getClosestParentIdByType' nId NodeCorpus
86 let msg = T.pack $ "[G.A.N.DFWN] Node has no corpus parent: " <> show nId
87 logStatus $ jobLogFailTotalWithMessage msg jobLog
90 frameWriteIds <- getChildrenByType nId NodeFrameWrite
92 -- https://write.frame.gargantext.org/<frame_id>/download
93 frameWrites <- mapM (\id -> getNodeWith id (Proxy :: Proxy HyperdataFrame)) frameWriteIds
95 frameWritesWithContents <- liftBase $
97 contents <- getHyperdataFrameContents (node ^. node_hyperdata)
101 let parsedE = (\(node, contents) -> hyperdataDocumentFromFrameWrite (node ^. node_hyperdata, contents)) <$> frameWritesWithContents
102 let parsed = rights parsedE
104 _ <- flowDataText (RootId (NodeId uId)) (DataNew [parsed]) (Multi EN) cId Nothing logStatus
106 pure $ jobLogSuccess jobLog
107 ------------------------------------------------------------------------
108 hyperdataDocumentFromFrameWrite :: (HyperdataFrame, T.Text) -> Either T.Text HyperdataDocument
109 hyperdataDocumentFromFrameWrite (HyperdataFrame { _hf_base, _hf_frame_id }, contents) =
110 case parseLines contents of
111 Left _ -> Left "Error parsing node"
112 Right (Parsed { authors, contents = c, date, source, title = t }) ->
113 let authorJoinSingle (Author { firstName, lastName }) = T.concat [ lastName, ", ", firstName ]
114 authors' = T.concat $ authorJoinSingle <$> authors
115 date' = (\(Date { year, month, day }) -> T.concat [ T.pack $ show year, "-"
116 , T.pack $ show month, "-"
117 , T.pack $ show day ]) <$> date
118 year' = fromIntegral $ maybe Defaults.year (\(Date { year }) -> year) date
119 month' = maybe Defaults.month (\(Date { month }) -> fromIntegral month) date
120 day' = maybe Defaults.day (\(Date { day }) -> fromIntegral day) date in
121 Right HyperdataDocument { _hd_bdd = Just "FrameWrite"
124 , _hd_uniqId = Nothing
125 , _hd_uniqIdBdd = Nothing
128 , _hd_authors = Just authors'
129 , _hd_institutes = Nothing
130 , _hd_source = source
131 , _hd_abstract = Just c
132 , _hd_publication_date = date'
133 , _hd_publication_year = Just year'
134 , _hd_publication_month = Just month'
135 , _hd_publication_day = Just day'
136 , _hd_publication_hour = Nothing
137 , _hd_publication_minute = Nothing
138 , _hd_publication_second = Nothing
139 , _hd_language_iso2 = Just $ T.pack $ show EN }