]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Node/DocumentsFromWriteNodes.hs
[MERGE]
[gargantext.git] / src / Gargantext / API / Node / DocumentsFromWriteNodes.hs
1 {-|
2 Module : Gargantext.API.Node.DocumentsFromWriteNodes
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE MonoLocalBinds #-}
13 {-# LANGUAGE TemplateHaskell #-}
14 {-# LANGUAGE TypeOperators #-}
15
16 module Gargantext.API.Node.DocumentsFromWriteNodes
17 where
18
19 -- import Data.Maybe (fromMaybe)
20 import Conduit
21 import Control.Lens ((^.))
22 import Data.Aeson
23 import Data.Either (Either(..), rights)
24 import Data.Maybe (fromMaybe)
25 import Data.Swagger
26 import Data.Text (Text)
27 import GHC.Generics (Generic)
28 import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
29 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
30 import Gargantext.API.Admin.Types (HasSettings)
31 import Gargantext.API.Job (jobLogSuccess, jobLogFailTotalWithMessage)
32 import Gargantext.API.Prelude (GargM, GargError)
33 import Gargantext.Core (Lang(..))
34 import Gargantext.Core.Text.Corpus.Parsers.FrameWrite
35 import Gargantext.Core.Text.List.Social (FlowSocialListWith)
36 import Gargantext.Core.Text.Terms (TermType(..))
37 import Gargantext.Core.Types.Individu (User(..))
38 import Gargantext.Database.Action.Flow (flowDataText, DataText(..))
39 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
40 import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
41 import Gargantext.Database.Admin.Types.Hyperdata.Frame
42 import Gargantext.Database.Admin.Types.Node
43 import Gargantext.Database.Query.Table.Node (getChildrenByType, getClosestParentIdByType', getNodeWith)
44 import Gargantext.Database.Schema.Node (node_hyperdata, node_name, node_date)
45 import Gargantext.Prelude
46 import Gargantext.Utils.Jobs (serveJobsAPI)
47 import Gargantext.Core.Text.Corpus.Parsers.Date (split')
48 import Servant
49 import Text.Read (readMaybe)
50 import qualified Data.List as List
51 import qualified Data.Text as T
52 -- import qualified Gargantext.Defaults as Defaults
53
54 ------------------------------------------------------------------------
55 type API = Summary " Documents from Write nodes."
56 :> AsyncJobs JobLog '[JSON] Params JobLog
57 ------------------------------------------------------------------------
58 data Params = Params
59 { id :: Int
60 , paragraphs :: Text
61 , lang :: Lang
62 , selection :: FlowSocialListWith
63 }
64 deriving (Generic, Show)
65 instance FromJSON Params where
66 parseJSON = genericParseJSON defaultOptions
67 instance ToJSON Params where
68 toJSON = genericToJSON defaultOptions
69 instance ToSchema Params
70 ------------------------------------------------------------------------
71 api :: UserId -> NodeId -> ServerT API (GargM Env GargError)
72 api uId nId =
73 serveJobsAPI DocumentFromWriteNodeJob $ \p log'' ->
74 let
75 log' x = do
76 liftBase $ log'' x
77 in documentsFromWriteNodes uId nId p (liftBase . log')
78
79 documentsFromWriteNodes :: (HasSettings env, FlowCmdM env err m)
80 => UserId
81 -> NodeId
82 -> Params
83 -> (JobLog -> m ())
84 -> m JobLog
85 documentsFromWriteNodes uId nId Params { selection, lang, paragraphs } logStatus = do
86 let jobLog = JobLog { _scst_succeeded = Just 1
87 , _scst_failed = Just 0
88 , _scst_remaining = Just 1
89 , _scst_events = Just []
90 }
91 logStatus jobLog
92
93 mcId <- getClosestParentIdByType' nId NodeCorpus
94 cId <- case mcId of
95 Just cId -> pure cId
96 Nothing -> do
97 let msg = T.pack $ "[G.A.N.DFWN] Node has no corpus parent: " <> show nId
98 logStatus $ jobLogFailTotalWithMessage msg jobLog
99 panic msg
100
101 frameWriteIds <- getChildrenByType nId NodeFrameWrite
102
103 -- https://write.frame.gargantext.org/<frame_id>/download
104 frameWrites <- mapM (\id -> getNodeWith id (Proxy :: Proxy HyperdataFrame)) frameWriteIds
105
106 frameWritesWithContents <- liftBase $
107 mapM (\node -> do
108 contents <- getHyperdataFrameContents (node ^. node_hyperdata)
109 pure (node, contents)
110 ) frameWrites
111
112 let paragraphs' = readMaybe $ T.unpack paragraphs :: Maybe Int
113
114 let parsedE = (\(node, contents)
115 -> hyperdataDocumentFromFrameWrite lang (fromMaybe 7 paragraphs') (node, contents)) <$> frameWritesWithContents
116 let parsed = List.concat $ rights parsedE
117
118 _ <- flowDataText (RootId (NodeId uId))
119 (DataNew (Just $ fromIntegral $ length parsed, yieldMany parsed))
120 (Multi lang)
121 cId
122 (Just selection)
123 logStatus
124
125 pure $ jobLogSuccess jobLog
126
127 ------------------------------------------------------------------------
128 hyperdataDocumentFromFrameWrite :: Lang -> Int -> (Node HyperdataFrame, T.Text) -> Either T.Text [HyperdataDocument]
129 hyperdataDocumentFromFrameWrite lang paragraphSize (node, contents) =
130 case parseLines contents of
131 Left _ -> Left "Error parsing node"
132 Right (Parsed { authors, contents = ctxts}) ->
133 let HyperdataFrame { _hf_base, _hf_frame_id } = node ^. node_hyperdata
134 authorJoinSingle (Author { firstName, lastName }) = T.concat [ lastName, ", ", firstName ]
135 authors' = T.concat $ authorJoinSingle <$> authors
136
137 --{-
138 (year',month',day') = split' (node^. node_date)
139 date' = Just $ T.concat [ T.pack $ show year', "-"
140 , T.pack $ show month', "-"
141 , T.pack $ show day'
142 ]
143 --}
144
145 {-
146 date' = (\(Date { year, month, day }) -> T.concat [ T.pack $ show year', "-"
147 , T.pack $ show month', "-"
148 , T.pack $ show day' ]) <$> date
149 year' = fromIntegral $ maybe Defaults.year (\(Date { year }) -> year) date
150 month' = maybe Defaults.month (\(Date { month }) -> fromIntegral month) date
151 day' = maybe Defaults.day (\(Date { day }) -> fromIntegral day) date
152 --}
153 in
154 Right (List.map (\(t, ctxt) -> HyperdataDocument { _hd_bdd = Just "FrameWrite"
155 , _hd_doi = Nothing
156 , _hd_url = Nothing
157 , _hd_uniqId = Nothing
158 , _hd_uniqIdBdd = Nothing
159 , _hd_page = Nothing
160 , _hd_title = Just t
161 , _hd_authors = Just authors'
162 , _hd_institutes = Nothing
163 , _hd_source = Just $ node ^. node_name
164 , _hd_abstract = Just ctxt
165 , _hd_publication_date = date'
166 , _hd_publication_year = Just year'
167 , _hd_publication_month = Just month'
168 , _hd_publication_day = Just day'
169 , _hd_publication_hour = Nothing
170 , _hd_publication_minute = Nothing
171 , _hd_publication_second = Nothing
172 , _hd_language_iso2 = Just $ T.pack $ show lang }
173 ) (text2titleParagraphs paragraphSize ctxts)
174 )