2 Module : Gargantext.API.Node.DocumentsFromWriteNodes
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE MonoLocalBinds #-}
13 {-# LANGUAGE TemplateHaskell #-}
14 {-# LANGUAGE TypeOperators #-}
15 {-# OPTIONS_GHC -fno-warn-orphans #-}
17 module Gargantext.API.Node.DocumentsFromWriteNodes
20 import Control.Lens ((^.))
22 import Data.Either (Either(..), rights)
24 import qualified Data.Text as T
25 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
26 import Gargantext.API.Admin.Types (HasSettings)
27 import Gargantext.API.Prelude (GargServer)
28 import Gargantext.Core (Lang(..))
29 import Gargantext.Core.Text.Corpus.Parsers.FrameWrite
30 import Gargantext.Core.Text.Terms (TermType(..))
31 import Gargantext.Core.Types.Individu (User(..))
32 import Gargantext.Database.Action.Flow (flowDataText, DataText(..))
33 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
34 import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
35 import Gargantext.Database.Admin.Types.Hyperdata.Frame
36 import Gargantext.Database.Admin.Types.Node
37 import Gargantext.Database.Query.Table.Node (getChildrenByType, getNodeWith)
38 import Gargantext.Database.Schema.Node (node_hyperdata)
39 import Gargantext.Prelude
40 import GHC.Generics (Generic)
42 import Servant.Job.Async (JobFunction(..), serveJobsAPI)
44 ------------------------------------------------------------------------
45 type API = Summary " Documents from Write nodes."
46 :> AsyncJobs JobLog '[JSON] Params JobLog
47 ------------------------------------------------------------------------
48 newtype Params = Params { id :: Int }
49 deriving (Generic, Show)
51 instance FromJSON Params where
52 parseJSON = genericParseJSON defaultOptions
53 instance ToJSON Params where
54 toJSON = genericToJSON defaultOptions
55 instance ToSchema Params
56 ------------------------------------------------------------------------
57 api :: UserId -> NodeId -> GargServer API
60 JobFunction (\p log'' ->
63 printDebug "documents from write nodes" x
65 in documentsFromWriteNodes uId nId p (liftBase . log')
68 documentsFromWriteNodes :: (HasSettings env, FlowCmdM env err m)
74 documentsFromWriteNodes uId nId p logStatus = do
76 logStatus JobLog { _scst_succeeded = Just 1
77 , _scst_failed = Just 0
78 , _scst_remaining = Just 1
79 , _scst_events = Just []
82 _ <- printDebug "[documentsFromWriteNodes] inside job, uId" uId
83 _ <- printDebug "[documentsFromWriteNodes] inside job, nId" nId
84 _ <- printDebug "[documentsFromWriteNodes] inside job, p" p
86 frameWriteIds <- getChildrenByType nId NodeFrameWrite
87 _ <- printDebug "[documentsFromWriteNodes] children" frameWriteIds
89 -- https://write.frame.gargantext.org/<frame_id>/download
90 frameWrites <- mapM (\id -> getNodeWith id (Proxy :: Proxy HyperdataFrame)) frameWriteIds
92 frameWritesWithContents <- liftBase $
94 contents <- getHyperdataFrameContents (node ^. node_hyperdata)
97 _ <- printDebug "[documentsFromWriteNodes] frameWritesWithContents" frameWritesWithContents
99 let parsedE = (\(node, contents) -> hyperdataDocumentFromFrameWrite (node ^. node_hyperdata, contents)) <$> frameWritesWithContents
100 let parsed = rights parsedE
101 _ <- printDebug "[documentsFromWriteNodes] parsed" parsed
103 _ <- flowDataText (RootId (NodeId uId)) (DataNew [parsed]) (Multi EN) nId Nothing
105 pure JobLog { _scst_succeeded = Just 2
106 , _scst_failed = Just 0
107 , _scst_remaining = Just 0
108 , _scst_events = Just []
110 ------------------------------------------------------------------------
111 hyperdataDocumentFromFrameWrite :: (HyperdataFrame, T.Text) -> Either T.Text HyperdataDocument
112 hyperdataDocumentFromFrameWrite (HyperdataFrame { _hf_base, _hf_frame_id }, contents) =
113 case parseLines contents of
114 Left _ -> Left "Error parsing node"
115 Right (Parsed { authors, contents = c, date, source, title = t }) ->
116 let authorJoinSingle (Author { firstName, lastName }) = T.concat [ lastName, ", ", firstName ] in
117 let authors' = T.concat $ authorJoinSingle <$> authors in
118 Right HyperdataDocument { _hd_bdd = Just "FrameWrite"
121 , _hd_uniqId = Nothing
122 , _hd_uniqIdBdd = Nothing
125 , _hd_authors = Just authors'
126 , _hd_institutes = Nothing
127 , _hd_source = source
128 , _hd_abstract = Just c
129 , _hd_publication_date = date
130 , _hd_publication_year = Nothing -- TODO
131 , _hd_publication_month = Nothing -- TODO
132 , _hd_publication_day = Nothing -- TODO
133 , _hd_publication_hour = Nothing
134 , _hd_publication_minute = Nothing
135 , _hd_publication_second = Nothing
136 , _hd_language_iso2 = Just $ T.pack $ show EN }