]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Node/DocumentsFromWriteNodes.hs
[write nodes] use closest corpus as parent
[gargantext.git] / src / Gargantext / API / Node / DocumentsFromWriteNodes.hs
1 {-|
2 Module : Gargantext.API.Node.DocumentsFromWriteNodes
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE MonoLocalBinds #-}
13 {-# LANGUAGE TemplateHaskell #-}
14 {-# LANGUAGE TypeOperators #-}
15 {-# OPTIONS_GHC -fno-warn-orphans #-}
16
17 module Gargantext.API.Node.DocumentsFromWriteNodes
18 where
19
20 import Control.Lens ((^.))
21 import Data.Aeson
22 import Data.Either (Either(..), rights)
23 import Data.Swagger
24 import qualified Data.Text as T
25 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
26 import Gargantext.API.Admin.Types (HasSettings)
27 import Gargantext.API.Prelude (GargServer)
28 import Gargantext.Core (Lang(..))
29 import Gargantext.Core.Text.Corpus.Parsers.FrameWrite
30 import Gargantext.Core.Text.Terms (TermType(..))
31 import Gargantext.Core.Types.Individu (User(..))
32 import Gargantext.Database.Action.Flow (flowDataText, DataText(..))
33 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
34 import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
35 import Gargantext.Database.Admin.Types.Hyperdata.Frame
36 import Gargantext.Database.Admin.Types.Node
37 import Gargantext.Database.Query.Table.Node (getChildrenByType, getClosestParentIdByType', getNodeWith)
38 import Gargantext.Database.Schema.Node (node_hyperdata)
39 import Gargantext.Prelude
40 import GHC.Generics (Generic)
41 import Servant
42 import Servant.Job.Async (JobFunction(..), serveJobsAPI)
43
44 ------------------------------------------------------------------------
45 type API = Summary " Documents from Write nodes."
46 :> AsyncJobs JobLog '[JSON] Params JobLog
47 ------------------------------------------------------------------------
48 newtype Params = Params { id :: Int }
49 deriving (Generic, Show)
50
51 instance FromJSON Params where
52 parseJSON = genericParseJSON defaultOptions
53 instance ToJSON Params where
54 toJSON = genericToJSON defaultOptions
55 instance ToSchema Params
56 ------------------------------------------------------------------------
57 api :: UserId -> NodeId -> GargServer API
58 api uId nId =
59 serveJobsAPI $
60 JobFunction (\p log'' ->
61 let
62 log' x = do
63 liftBase $ log'' x
64 in documentsFromWriteNodes uId nId p (liftBase . log')
65 )
66
67 documentsFromWriteNodes :: (HasSettings env, FlowCmdM env err m)
68 => UserId
69 -> NodeId
70 -> Params
71 -> (JobLog -> m ())
72 -> m JobLog
73 documentsFromWriteNodes uId nId _p logStatus = do
74
75 logStatus JobLog { _scst_succeeded = Just 1
76 , _scst_failed = Just 0
77 , _scst_remaining = Just 1
78 , _scst_events = Just []
79 }
80
81 mcId <- getClosestParentIdByType' nId NodeCorpus
82 let cId = maybe (panic "[G.A.N.DFWN] Node has no parent") identity mcId
83
84 frameWriteIds <- getChildrenByType nId NodeFrameWrite
85
86 -- https://write.frame.gargantext.org/<frame_id>/download
87 frameWrites <- mapM (\id -> getNodeWith id (Proxy :: Proxy HyperdataFrame)) frameWriteIds
88
89 frameWritesWithContents <- liftBase $
90 mapM (\node -> do
91 contents <- getHyperdataFrameContents (node ^. node_hyperdata)
92 pure (node, contents)
93 ) frameWrites
94
95 let parsedE = (\(node, contents) -> hyperdataDocumentFromFrameWrite (node ^. node_hyperdata, contents)) <$> frameWritesWithContents
96 let parsed = rights parsedE
97
98 _ <- flowDataText (RootId (NodeId uId)) (DataNew [parsed]) (Multi EN) cId Nothing
99
100 pure JobLog { _scst_succeeded = Just 2
101 , _scst_failed = Just 0
102 , _scst_remaining = Just 0
103 , _scst_events = Just []
104 }
105 ------------------------------------------------------------------------
106 hyperdataDocumentFromFrameWrite :: (HyperdataFrame, T.Text) -> Either T.Text HyperdataDocument
107 hyperdataDocumentFromFrameWrite (HyperdataFrame { _hf_base, _hf_frame_id }, contents) =
108 case parseLines contents of
109 Left _ -> Left "Error parsing node"
110 Right (Parsed { authors, contents = c, date, source, title = t }) ->
111 let authorJoinSingle (Author { firstName, lastName }) = T.concat [ lastName, ", ", firstName ] in
112 let authors' = T.concat $ authorJoinSingle <$> authors in
113 Right HyperdataDocument { _hd_bdd = Just "FrameWrite"
114 , _hd_doi = Nothing
115 , _hd_url = Nothing
116 , _hd_uniqId = Nothing
117 , _hd_uniqIdBdd = Nothing
118 , _hd_page = Nothing
119 , _hd_title = Just t
120 , _hd_authors = Just authors'
121 , _hd_institutes = Nothing
122 , _hd_source = source
123 , _hd_abstract = Just c
124 , _hd_publication_date = date
125 , _hd_publication_year = Nothing -- TODO
126 , _hd_publication_month = Nothing -- TODO
127 , _hd_publication_day = Nothing -- TODO
128 , _hd_publication_hour = Nothing
129 , _hd_publication_minute = Nothing
130 , _hd_publication_second = Nothing
131 , _hd_language_iso2 = Just $ T.pack $ show EN }