]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Corpus/Parsers/Isidore.hs
working on perf
[gargantext.git] / src / Gargantext / Text / Corpus / Parsers / Isidore.hs
1 {-|
2 Module : Gargantext.Text.Corpus.Parsers.Isidore
3 Description : To query French Humanities publication database
4 Copyright : (c) CNRS, 2019-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 TODO:
11 - put endpoint in configuration file
12 - more flexible fields of research
13 - type database name
14 - use more ontologies to help building corpora
15 -}
16
17
18 {-# LANGUAGE NoImplicitPrelude #-}
19 {-# LANGUAGE ScopedTypeVariables #-}
20 {-# LANGUAGE OverloadedStrings #-}
21
22 module Gargantext.Text.Corpus.Parsers.Isidore where
23
24 import Control.Lens hiding (contains)
25 import Data.ByteString.Lazy (ByteString)
26 import Data.RDF hiding (triple, Query)
27 import Data.Text hiding (groupBy, map)
28 import Database.HSparql.Connection
29 import Database.HSparql.QueryGenerator
30 import Gargantext.Database.Types.Node (HyperdataDocument(..))
31 import Gargantext.Prelude
32 import Gargantext.Core (Lang)
33 import Network.Wreq (getWith, Response, defaults, header, param, responseStatus, responseBody)
34 import Prelude (String)
35
36 route :: EndPoint
37 route = "https://isidore.science/sparql/"
38
39 selectQueryRaw' :: String -> String -> IO (Response ByteString)
40 selectQueryRaw' uri q = getWith opts uri
41 where
42 opts = defaults & header "Accept" .~ ["application/sparql-results+xml"]
43 & header "User-Agent" .~ ["gargantext-hsparql-client"]
44 & param "query" .~ [Data.Text.pack q]
45
46 isidoreGet :: Lang -> Int -> Text -> IO (Maybe [HyperdataDocument])
47 isidoreGet la li q = do
48 bindingValues <- isidoreGet' li q
49 case bindingValues of
50 Nothing -> pure Nothing
51 Just dv -> pure $ Just $ map (bind2doc la) dv
52
53 isidoreGet' :: Int -> Text -> IO (Maybe [[BindingValue]])
54 isidoreGet' l q = do
55 let s = createSelectQuery $ isidoreSelect l q
56 putStrLn s
57 r <- selectQueryRaw' route s
58 putStrLn $ show $ r ^. responseStatus
59 pure $ structureContent $ r ^. responseBody
60 -- res <- selectQuery route $ simpleSelect q
61 -- pure res
62
63 isidoreSelect :: Int -> Text -> Query SelectQuery
64 isidoreSelect lim q = do
65 -- See Predefined Namespace Prefixes:
66 -- https://isidore.science/sparql?nsdecl
67 isidore <- prefix "isidore" (iriRef "http://isidore.science/class/")
68 rdf <- prefix "rdf" (iriRef "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
69 dcterms <- prefix "dcterms" (iriRef "http://purl.org/dc/terms/")
70 dc <- prefix "dc" (iriRef "http://purl.org/dc/elements/1.1/")
71 --iso <- prefix "fra" (iriRef "http://lexvo.org/id/iso639-3/")
72 --ore <- prefix "ore" (iriRef "http://www.openarchives.org/ore/terms/")
73 --bif <- prefix "bif" (iriRef "bif:")
74
75 link <- var
76 title <- var
77 date <- var
78 abstract <- var
79 authors <- var
80 source <- var
81 langDoc <- var
82 publisher <- var
83 --agg <- var
84
85 triple_ link (rdf .:. "type") (isidore .:. "Document")
86 triple_ link (dcterms .:. "title") title
87 triple_ link (dcterms .:. "date") date
88 triple_ link (dcterms .:. "creator") authors
89 --triple_ link (dcterms .:. "language") langDoc
90 triple_ link (dc .:. "description") abstract
91 --triple_ link (ore .:. "isAggregatedBy") agg
92 --triple_ agg (dcterms .:. "title") title
93
94 optional_ $ triple_ link (dcterms .:. "source") source
95 optional_ $ triple_ link (dcterms .:. "publisher") publisher
96
97 -- TODO FIX BUG with (.||.) operator
98 --filterExpr_ $ (.||.) (contains title q) (contains abstract q)
99 --filterExpr_ (containsWith authors q) -- (contains abstract q)
100 --filterExpr_ (containsWith title q) -- (contains abstract q)
101 --filterExpr_ $ (.||.) (containsWith title q) (contains abstract q)
102 filterExpr_ (containsWith title q)
103
104 -- TODO FIX filter with lang
105 --filterExpr_ $ langMatches title (str ("fra" :: Text))
106 --filterExpr_ $ (.==.) langDoc (str ("http://lexvo.org/id/iso639-3/fra" :: Text))
107
108 orderNextDesc date
109 limit_ lim
110 distinct_
111 selectVars [link, date, langDoc, authors, source, publisher, title, abstract]
112
113 -- | TODO : check if all cases are taken into account
114 unbound :: Lang -> BindingValue -> Maybe Text
115 unbound _ Unbound = Nothing
116 unbound _ (Bound (UNode x)) = Just x
117 unbound _ (Bound (LNode (TypedL x _))) = Just x
118 unbound _ (Bound (LNode (PlainL x))) = Just x
119 unbound l (Bound (LNode (PlainLL x l'))) = if l' == (toLower $ cs $ show l) then Just x else Nothing
120 unbound _ _ = Nothing
121
122 bind2doc :: Lang -> [BindingValue] -> HyperdataDocument
123 bind2doc l [ link, date, langDoc, authors, _source, publisher, title, abstract ] =
124 HyperdataDocument (Just "Isidore")
125 Nothing
126 (unbound l link)
127 Nothing Nothing Nothing
128 (unbound l title)
129 (unbound l authors)
130 Nothing
131 (unbound l publisher)
132 (unbound l abstract)
133 (unbound l date)
134 Nothing Nothing Nothing Nothing Nothing Nothing
135 (unbound l langDoc)
136
137 bind2doc _ _ = undefined