2 Module : Gargantext.Text.Corpus.Parsers.Isidore
3 Description : To query French Humanities publication database
4 Copyright : (c) CNRS, 2019-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 - put endpoint in configuration file
12 - more flexible fields of research
14 - use more ontologies to help building corpora
18 {-# LANGUAGE NoImplicitPrelude #-}
19 {-# LANGUAGE ScopedTypeVariables #-}
20 {-# LANGUAGE OverloadedStrings #-}
22 module Gargantext.Text.Corpus.Parsers.Isidore where
24 import Control.Lens hiding (contains)
25 import Data.ByteString.Lazy (ByteString)
26 import Data.RDF hiding (triple, Query)
27 import Data.Text hiding (groupBy, map)
28 import Database.HSparql.Connection
29 import Database.HSparql.QueryGenerator
30 import Gargantext.Database.Types.Node (HyperdataDocument(..))
31 import Gargantext.Prelude
32 import Gargantext.Core (Lang)
33 import Network.Wreq (getWith, Response, defaults, header, param, responseStatus, responseBody)
34 import Prelude (String)
37 route = "https://isidore.science/sparql/"
39 selectQueryRaw' :: String -> String -> IO (Response ByteString)
40 selectQueryRaw' uri q = getWith opts uri
42 opts = defaults & header "Accept" .~ ["application/sparql-results+xml"]
43 & header "User-Agent" .~ ["gargantext-hsparql-client"]
44 & param "query" .~ [Data.Text.pack q]
46 isidoreGet :: Lang -> Int -> Text -> IO (Maybe [HyperdataDocument])
47 isidoreGet la li q = do
48 bindingValues <- isidoreGet' li q
50 Nothing -> pure Nothing
51 Just dv -> pure $ Just $ map (bind2doc la) dv
53 isidoreGet' :: Int -> Text -> IO (Maybe [[BindingValue]])
55 let s = createSelectQuery $ isidoreSelect l q
57 r <- selectQueryRaw' route s
58 putStrLn $ show $ r ^. responseStatus
59 pure $ structureContent $ r ^. responseBody
60 -- res <- selectQuery route $ simpleSelect q
63 isidoreSelect :: Int -> Text -> Query SelectQuery
64 isidoreSelect lim q = do
65 -- See Predefined Namespace Prefixes:
66 -- https://isidore.science/sparql?nsdecl
67 isidore <- prefix "isidore" (iriRef "http://isidore.science/class/")
68 rdf <- prefix "rdf" (iriRef "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
69 dcterms <- prefix "dcterms" (iriRef "http://purl.org/dc/terms/")
70 dc <- prefix "dc" (iriRef "http://purl.org/dc/elements/1.1/")
71 --iso <- prefix "fra" (iriRef "http://lexvo.org/id/iso639-3/")
72 --ore <- prefix "ore" (iriRef "http://www.openarchives.org/ore/terms/")
73 --bif <- prefix "bif" (iriRef "bif:")
85 triple_ link (rdf .:. "type") (isidore .:. "Document")
86 triple_ link (dcterms .:. "title") title
87 triple_ link (dcterms .:. "date") date
88 triple_ link (dcterms .:. "creator") authors
89 --triple_ link (dcterms .:. "language") langDoc
90 triple_ link (dc .:. "description") abstract
91 --triple_ link (ore .:. "isAggregatedBy") agg
92 --triple_ agg (dcterms .:. "title") title
94 optional_ $ triple_ link (dcterms .:. "source") source
95 optional_ $ triple_ link (dcterms .:. "publisher") publisher
97 -- TODO FIX BUG with (.||.) operator
98 --filterExpr_ $ (.||.) (contains title q) (contains abstract q)
99 --filterExpr_ (containsWith authors q) -- (contains abstract q)
100 --filterExpr_ (containsWith title q) -- (contains abstract q)
101 --filterExpr_ $ (.||.) (containsWith title q) (contains abstract q)
102 filterExpr_ (containsWith title q)
104 -- TODO FIX filter with lang
105 --filterExpr_ $ langMatches title (str ("fra" :: Text))
106 --filterExpr_ $ (.==.) langDoc (str ("http://lexvo.org/id/iso639-3/fra" :: Text))
111 selectVars [link, date, langDoc, authors, source, publisher, title, abstract]
113 -- | TODO : check if all cases are taken into account
114 unbound :: Lang -> BindingValue -> Maybe Text
115 unbound _ Unbound = Nothing
116 unbound _ (Bound (UNode x)) = Just x
117 unbound _ (Bound (LNode (TypedL x _))) = Just x
118 unbound _ (Bound (LNode (PlainL x))) = Just x
119 unbound l (Bound (LNode (PlainLL x l'))) = if l' == (toLower $ cs $ show l) then Just x else Nothing
120 unbound _ _ = Nothing
122 bind2doc :: Lang -> [BindingValue] -> HyperdataDocument
123 bind2doc l [ link, date, langDoc, authors, _source, publisher, title, abstract ] =
124 HyperdataDocument (Just "Isidore")
127 Nothing Nothing Nothing
131 (unbound l publisher)
134 Nothing Nothing Nothing Nothing Nothing Nothing
137 bind2doc _ _ = undefined