2 Module : Gargantext.Text.Parsers.Isidore
3 Description : To query French Humanities publication database
4 Copyright : (c) CNRS, 2019-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 - put endpoint in configuration file
12 - more flexible fields of research
14 - use more ontologies to help building corpora
18 {-# LANGUAGE NoImplicitPrelude #-}
19 {-# LANGUAGE ScopedTypeVariables #-}
20 {-# LANGUAGE OverloadedStrings #-}
22 module Gargantext.Text.Parsers.Isidore where
24 import Control.Lens hiding (contains)
25 import Data.ByteString.Lazy (ByteString)
26 import Data.RDF hiding (triple, Query)
27 import Data.Text hiding (groupBy, map)
28 import Database.HSparql.Connection
29 import Database.HSparql.QueryGenerator
30 import Gargantext.Database.Types.Node (HyperdataDocument(..))
31 import Gargantext.Prelude
32 import Gargantext.Core (Lang)
33 import Network.Wreq (getWith, Response, defaults, header, param, responseStatus, responseBody)
34 import Prelude (String)
37 route = "https://isidore.science/sparql/"
39 selectQueryRaw' :: String -> String -> IO (Response ByteString)
40 selectQueryRaw' uri q = getWith opts uri
42 opts = defaults & header "Accept" .~ ["application/sparql-results+xml"]
43 & header "User-Agent" .~ ["gargantext-hsparql-client"]
44 & param "query" .~ [Data.Text.pack q]
46 isidoreGet :: Lang -> Text -> IO (Maybe [HyperdataDocument])
48 bindingValues <- isidoreGet' q
50 Nothing -> pure Nothing
51 Just dv -> pure $ Just $ map (bind2doc l) dv
53 isidoreGet' :: Text -> IO (Maybe [[BindingValue]])
55 let s = createSelectQuery $ isidoreSelect q
57 r <- selectQueryRaw' route s
58 putStrLn $ show $ r ^. responseStatus
59 pure $ structureContent $ r ^. responseBody
60 -- res <- selectQuery route $ simpleSelect q
63 isidoreSelect :: Text -> Query SelectQuery
65 -- See Predefined Namespace Prefixes:
66 -- https://isidore.science/sparql?nsdecl
67 isidore <- prefix "isidore" (iriRef "http://www.rechercheisidore.fr/class/")
68 rdf <- prefix "rdf" (iriRef "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
69 dcterms <- prefix "dcterms" (iriRef "http://purl.org/dc/terms/")
70 dc <- prefix "dc" (iriRef "http://purl.org/dc/elements/1.1/")
71 --iso <- prefix "fra" (iriRef "http://lexvo.org/id/iso639-3/")
72 --ore <- prefix "ore" (iriRef "http://www.openarchives.org/ore/terms/")
73 --bif <- prefix "bif" (iriRef "bif:")
86 triple_ link (rdf .:. "type") (isidore .:. "BibliographicalResource")
87 triple_ link (dcterms .:. "title") title
88 triple_ link (dcterms .:. "date") date
89 triple_ link (dcterms .:. "creator") authors
90 triple_ link (dcterms .:. "language") langDoc
91 triple_ link (dc .:. "description") abstract
92 --triple_ link (ore .:. "isAggregatedBy") agg
93 --triple_ agg (dcterms .:. "title") title
95 optional_ $ triple_ link (dcterms .:. "source") source
96 optional_ $ triple_ link (dcterms .:. "publisher") publisher
98 -- TODO FIX BUG with (.||.) operator
99 --filterExpr $ (.||.) (contains title q) (contains title q)
100 filterExpr_ (containsWith title q) -- (contains abstract q)
101 --filterExpr (containsWith abstract q) -- (contains abstract q)
103 -- TODO FIX filter with lang
104 --filterExpr $ langMatches title (str ("fra" :: Text))
105 --filterExpr $ (.==.) lang (str ("http://lexvo.org/id/iso639-3/fra" :: Text))
110 selectVars [link, date, langDoc, authors, source, publisher, title, abstract]
112 -- | TODO : check if all cases are taken into account
113 unbound :: Lang -> BindingValue -> Maybe Text
114 unbound _ Unbound = Nothing
115 unbound _ (Bound (UNode x)) = Just x
116 unbound _ (Bound (LNode (TypedL x _))) = Just x
117 unbound _ (Bound (LNode (PlainL x))) = Just x
118 unbound l (Bound (LNode (PlainLL x l'))) = if l' == (toLower $ cs $ show l) then Just x else Nothing
119 unbound _ _ = Nothing
121 bind2doc :: Lang -> [BindingValue] -> HyperdataDocument
122 bind2doc l [ link, date, langDoc, authors, _source, publisher, title, abstract ] =
123 HyperdataDocument (Just "Isidore")
126 Nothing Nothing Nothing
130 (unbound l publisher)
133 Nothing Nothing Nothing Nothing Nothing Nothing
136 bind2doc _ _ = undefined