]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Node/DocumentsFromWriteNodes.hs
Merge branch 'dev' into 97-dev-istex-search
[gargantext.git] / src / Gargantext / API / Node / DocumentsFromWriteNodes.hs
1 {-|
2 Module : Gargantext.API.Node.DocumentsFromWriteNodes
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE MonoLocalBinds #-}
13 {-# LANGUAGE TemplateHaskell #-}
14 {-# LANGUAGE TypeOperators #-}
15
16 module Gargantext.API.Node.DocumentsFromWriteNodes
17 where
18
19 import Conduit
20 import Control.Lens ((^.))
21 import Data.Aeson
22 import Data.Either (Either(..), rights)
23 import Data.Swagger
24 import qualified Data.Text as T
25 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
26 import Gargantext.API.Admin.Types (HasSettings)
27 import Gargantext.API.Job (jobLogSuccess, jobLogFailTotalWithMessage)
28 import Gargantext.API.Prelude (GargServer)
29 import Gargantext.Core (Lang(..))
30 import Gargantext.Core.Text.Corpus.Parsers.FrameWrite
31 import Gargantext.Core.Text.Terms (TermType(..))
32 import Gargantext.Core.Types.Individu (User(..))
33 import Gargantext.Database.Action.Flow (flowDataText, DataText(..))
34 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
35 import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
36 import Gargantext.Database.Admin.Types.Hyperdata.Frame
37 import Gargantext.Database.Admin.Types.Node
38 import Gargantext.Database.Query.Table.Node (getChildrenByType, getClosestParentIdByType', getNodeWith)
39 import Gargantext.Database.Schema.Node (node_hyperdata)
40 import qualified Gargantext.Defaults as Defaults
41 import Gargantext.Prelude
42 import GHC.Generics (Generic)
43 import Servant
44 import Servant.Job.Async (JobFunction(..), serveJobsAPI)
45
46 ------------------------------------------------------------------------
47 type API = Summary " Documents from Write nodes."
48 :> AsyncJobs JobLog '[JSON] Params JobLog
49 ------------------------------------------------------------------------
50 newtype Params = Params { id :: Int }
51 deriving (Generic, Show)
52
53 instance FromJSON Params where
54 parseJSON = genericParseJSON defaultOptions
55 instance ToJSON Params where
56 toJSON = genericToJSON defaultOptions
57 instance ToSchema Params
58 ------------------------------------------------------------------------
59 api :: UserId -> NodeId -> GargServer API
60 api uId nId =
61 serveJobsAPI $
62 JobFunction (\p log'' ->
63 let
64 log' x = do
65 liftBase $ log'' x
66 in documentsFromWriteNodes uId nId p (liftBase . log')
67 )
68
69 documentsFromWriteNodes :: (HasSettings env, FlowCmdM env err m)
70 => UserId
71 -> NodeId
72 -> Params
73 -> (JobLog -> m ())
74 -> m JobLog
75 documentsFromWriteNodes uId nId _p logStatus = do
76 let jobLog = JobLog { _scst_succeeded = Just 1
77 , _scst_failed = Just 0
78 , _scst_remaining = Just 1
79 , _scst_events = Just []
80 }
81 logStatus jobLog
82
83 mcId <- getClosestParentIdByType' nId NodeCorpus
84 cId <- case mcId of
85 Just cId -> pure cId
86 Nothing -> do
87 let msg = T.pack $ "[G.A.N.DFWN] Node has no corpus parent: " <> show nId
88 logStatus $ jobLogFailTotalWithMessage msg jobLog
89 panic msg
90
91 frameWriteIds <- getChildrenByType nId NodeFrameWrite
92
93 -- https://write.frame.gargantext.org/<frame_id>/download
94 frameWrites <- mapM (\id -> getNodeWith id (Proxy :: Proxy HyperdataFrame)) frameWriteIds
95
96 frameWritesWithContents <- liftBase $
97 mapM (\node -> do
98 contents <- getHyperdataFrameContents (node ^. node_hyperdata)
99 pure (node, contents)
100 ) frameWrites
101
102 let parsedE = (\(node, contents) -> hyperdataDocumentFromFrameWrite (node ^. node_hyperdata, contents)) <$> frameWritesWithContents
103 let parsed = rights parsedE
104
105 _ <- flowDataText (RootId (NodeId uId)) (DataNew (Just $ fromIntegral $ length parsed, yieldMany parsed)) (Multi EN) cId Nothing logStatus
106
107 pure $ jobLogSuccess jobLog
108 ------------------------------------------------------------------------
109 hyperdataDocumentFromFrameWrite :: (HyperdataFrame, T.Text) -> Either T.Text HyperdataDocument
110 hyperdataDocumentFromFrameWrite (HyperdataFrame { _hf_base, _hf_frame_id }, contents) =
111 case parseLines contents of
112 Left _ -> Left "Error parsing node"
113 Right (Parsed { authors, contents = c, date, source, title = t }) ->
114 let authorJoinSingle (Author { firstName, lastName }) = T.concat [ lastName, ", ", firstName ]
115 authors' = T.concat $ authorJoinSingle <$> authors
116 date' = (\(Date { year, month, day }) -> T.concat [ T.pack $ show year, "-"
117 , T.pack $ show month, "-"
118 , T.pack $ show day ]) <$> date
119 year' = fromIntegral $ maybe Defaults.year (\(Date { year }) -> year) date
120 month' = maybe Defaults.month (\(Date { month }) -> fromIntegral month) date
121 day' = maybe Defaults.day (\(Date { day }) -> fromIntegral day) date in
122 Right HyperdataDocument { _hd_bdd = Just "FrameWrite"
123 , _hd_doi = Nothing
124 , _hd_url = Nothing
125 , _hd_uniqId = Nothing
126 , _hd_uniqIdBdd = Nothing
127 , _hd_page = Nothing
128 , _hd_title = Just t
129 , _hd_authors = Just authors'
130 , _hd_institutes = Nothing
131 , _hd_source = source
132 , _hd_abstract = Just c
133 , _hd_publication_date = date'
134 , _hd_publication_year = Just year'
135 , _hd_publication_month = Just month'
136 , _hd_publication_day = Just day'
137 , _hd_publication_hour = Nothing
138 , _hd_publication_minute = Nothing
139 , _hd_publication_second = Nothing
140 , _hd_language_iso2 = Just $ T.pack $ show EN }