]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Node/DocumentsFromWriteNodes.hs
[john-snow] implement pos/lemma language
[gargantext.git] / src / Gargantext / API / Node / DocumentsFromWriteNodes.hs
1 {-|
2 Module : Gargantext.API.Node.DocumentsFromWriteNodes
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE MonoLocalBinds #-}
13 {-# LANGUAGE TemplateHaskell #-}
14 {-# LANGUAGE TypeOperators #-}
15
16 module Gargantext.API.Node.DocumentsFromWriteNodes
17 where
18
19 import Control.Lens ((^.))
20 import Data.Aeson
21 import Data.Either (Either(..), rights)
22 import Data.Swagger
23 import qualified Data.Text as T
24 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
25 import Gargantext.API.Admin.Types (HasSettings)
26 import Gargantext.API.Job (jobLogSuccess, jobLogFailTotalWithMessage)
27 import Gargantext.API.Prelude (GargServer)
28 import Gargantext.Core (Lang(..))
29 import Gargantext.Core.Text.Corpus.Parsers.FrameWrite
30 import Gargantext.Core.Text.Terms (TermType(..))
31 import Gargantext.Core.Types.Individu (User(..))
32 import Gargantext.Database.Action.Flow (flowDataText, DataText(..))
33 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
34 import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
35 import Gargantext.Database.Admin.Types.Hyperdata.Frame
36 import Gargantext.Database.Admin.Types.Node
37 import Gargantext.Database.Query.Table.Node (getChildrenByType, getClosestParentIdByType', getNodeWith)
38 import Gargantext.Database.Schema.Node (node_hyperdata)
39 import Gargantext.Prelude
40 import GHC.Generics (Generic)
41 import Servant
42 import Servant.Job.Async (JobFunction(..), serveJobsAPI)
43
44 ------------------------------------------------------------------------
45 type API = Summary " Documents from Write nodes."
46 :> AsyncJobs JobLog '[JSON] Params JobLog
47 ------------------------------------------------------------------------
48 newtype Params = Params { id :: Int }
49 deriving (Generic, Show)
50
51 instance FromJSON Params where
52 parseJSON = genericParseJSON defaultOptions
53 instance ToJSON Params where
54 toJSON = genericToJSON defaultOptions
55 instance ToSchema Params
56 ------------------------------------------------------------------------
57 api :: UserId -> NodeId -> GargServer API
58 api uId nId =
59 serveJobsAPI $
60 JobFunction (\p log'' ->
61 let
62 log' x = do
63 liftBase $ log'' x
64 in documentsFromWriteNodes uId nId p (liftBase . log')
65 )
66
67 documentsFromWriteNodes :: (HasSettings env, FlowCmdM env err m)
68 => UserId
69 -> NodeId
70 -> Params
71 -> (JobLog -> m ())
72 -> m JobLog
73 documentsFromWriteNodes uId nId _p logStatus = do
74 let jobLog = JobLog { _scst_succeeded = Just 1
75 , _scst_failed = Just 0
76 , _scst_remaining = Just 1
77 , _scst_events = Just []
78 }
79 logStatus jobLog
80
81 mcId <- getClosestParentIdByType' nId NodeCorpus
82 cId <- case mcId of
83 Just cId -> pure cId
84 Nothing -> do
85 let msg = T.pack $ "[G.A.N.DFWN] Node has no corpus parent: " <> show nId
86 logStatus $ jobLogFailTotalWithMessage msg jobLog
87 panic msg
88
89 frameWriteIds <- getChildrenByType nId NodeFrameWrite
90
91 -- https://write.frame.gargantext.org/<frame_id>/download
92 frameWrites <- mapM (\id -> getNodeWith id (Proxy :: Proxy HyperdataFrame)) frameWriteIds
93
94 frameWritesWithContents <- liftBase $
95 mapM (\node -> do
96 contents <- getHyperdataFrameContents (node ^. node_hyperdata)
97 pure (node, contents)
98 ) frameWrites
99
100 let parsedE = (\(node, contents) -> hyperdataDocumentFromFrameWrite (node ^. node_hyperdata, contents)) <$> frameWritesWithContents
101 let parsed = rights parsedE
102
103 _ <- flowDataText (RootId (NodeId uId)) (DataNew [parsed]) (Multi EN) cId Nothing logStatus
104
105 pure $ jobLogSuccess jobLog
106 ------------------------------------------------------------------------
107 hyperdataDocumentFromFrameWrite :: (HyperdataFrame, T.Text) -> Either T.Text HyperdataDocument
108 hyperdataDocumentFromFrameWrite (HyperdataFrame { _hf_base, _hf_frame_id }, contents) =
109 case parseLines contents of
110 Left _ -> Left "Error parsing node"
111 Right (Parsed { authors, contents = c, date, source, title = t }) ->
112 let authorJoinSingle (Author { firstName, lastName }) = T.concat [ lastName, ", ", firstName ]
113 authors' = T.concat $ authorJoinSingle <$> authors
114 date' = (\(Date { year, month, day }) -> T.concat [ T.pack $ show year, "-"
115 , T.pack $ show month, "-"
116 , T.pack $ show day ]) <$> date
117 year' = fromIntegral $ maybe 2021 (\(Date { year }) -> year) date
118 month' = fromIntegral $ maybe 10 (\(Date { month }) -> month) date
119 day' = fromIntegral $ maybe 4 (\(Date { day }) -> day) date in
120 Right HyperdataDocument { _hd_bdd = Just "FrameWrite"
121 , _hd_doi = Nothing
122 , _hd_url = Nothing
123 , _hd_uniqId = Nothing
124 , _hd_uniqIdBdd = Nothing
125 , _hd_page = Nothing
126 , _hd_title = Just t
127 , _hd_authors = Just authors'
128 , _hd_institutes = Nothing
129 , _hd_source = source
130 , _hd_abstract = Just c
131 , _hd_publication_date = date'
132 , _hd_publication_year = Just year'
133 , _hd_publication_month = Just month'
134 , _hd_publication_day = Just day'
135 , _hd_publication_hour = Nothing
136 , _hd_publication_minute = Nothing
137 , _hd_publication_second = Nothing
138 , _hd_language_iso2 = Just $ T.pack $ show EN }