How do we generate the sitemap.xml and submit to the search engines?

#!/usr/bin/php -q
<?php
require_once “/usr/lib/rkb/functions-utf.inc.php”;
/**
generates sitemap.xml for a linked data site that has
a triplestore which resolves URIs to provide a Symmetric Concise Bounded Description,
as well as a SPARQL endpoint,
and RDF files which are the source that populated the triplestore.
Now submits to some search engines:
SWSE
*/
$usage = “Usage: {$argv[0]} sub_domain_name\n”;
if(!isset($argv[1])) die($usage);
$base_domain = “rkbexplorer.com”;
$sub_domain = $argv[1];
$domain = $sub_domain.”.”.$base_domain;
$outfile = “../$domain/sitemap.xml”;
$file = fopen($outfile, “w”);
$slicing = “subject-object”;
$name = “”; if (file_exists(”../$domain/about/name.txt”)) $name = trim(entities2accents(file_get_contents(”../$domain/about/name.txt”)));
$typical = “”; if (file_exists(”../$domain/about/typical.txt”)) $typical = trim(file_get_contents(”../$domain/about/typical.txt”));
$updated = “”; exec(”/var/www/vhosts/wildcard.rkbexplorer.com/repositories/tools/rkb-utils last-update-w3c “.$sub_domain, $updated);
$changefreq = “monthly”; if (file_exists(”../$domain/about/changefreq.txt”)) $name = trim(file_get_contents(”../$domain/about/changefreq.txt”));
fwrite($file,”<?xml version=\”1.0\” encoding=\”UTF-8\”?>\n”);
fwrite($file,”<urlset xmlns=\”http://www.sitemaps.org/schemas/sitemap/0.9\”\n”);
fwrite($file,”        xmlns:sc=\”http://sw.deri.org/2007/07/sitemapextension/scschema.xsd\”>\n”);
fwrite($file,”  <sc:dataset>\n”);
fwrite($file,”    <sc:linkedDataPrefix slicing=\”$slicing\”>http://$domain/id/</sc:linkedDataPrefix>\n”);
fwrite($file,”    <sc:sparqlEndpointLocation>http://$domain/sparql/</sc:sparqlEndpointLocation>\n”);
$models = opendir(”../$domain/models”);
while (false !== ($model = readdir($models))) {
if (preg_match(’/\.rdf$/’, $model) || preg_match(’/\.ttl$/’, $model) || preg_match(’/\.n3$/’, $model) || preg_match(’/\.turtle$/’, $model) || preg_match(’/\.ntriples$/’, $model))
fwrite($file,”    <sc:dataDumpLocation>http://$domain/models/$model</sc:dataDumpLocation>\n”);
};
closedir($models);
fwrite($file,”    <sc:datasetURI>http://$domain/</sc:datasetURI>\n”);
fwrite($file,”    <sc:datasetURI>http://$domain/id/void</sc:datasetURI>\n”);
if ($name != “”) fwrite($file,”    <sc:datasetLabel>$name RDF dataset from RKBExplorer.com</sc:datasetLabel>\n”);
if ($typical != “”) fwrite($file,”    <sc:sampleURI>$typical</sc:sampleURI>\n”);
fwrite($file,”    <lastmod>$updated[0]</lastmod>\n”);
fwrite($file,”    <changefreq>$changefreq</changefreq>\n”);
fwrite($file,”  </sc:dataset>\n”);
fwrite($file,”</urlset>\n”);
fclose($file);
$sitemap_url = “http://$domain/sitemap.xml”;
// Submit to SWSE
print “Submitting $domain to SWSE: \n”;
$ch = curl_init(”http://swse.deri.org/ping?sitemap=$sitemap_url”);
curl_exec($ch);
print “\n”;
curl_close($ch);
// Submit POST request to Sindice
print “Submitting $domain to Sindice: \n”;
$data = “url=”.urlencode($sitemap_url);;
$fp = fsockopen(”sindice.com”, 80);
fputs($fp, “POST /api/v1/sitemap HTTP/1.0\r\n”);
fputs($fp, “Host: sindice.com\r\n”);
fputs($fp, “Content-type: application/x-www-form-urlencoded\r\n”);
fputs($fp, “Content-length: “. strlen($data) .”\r\n\r\n”);
fputs($fp, $data);
//  read result back from the sindice server
$result = ”;
while(!feof($fp)) $result .= fgets($fp, 128);
fclose($fp);
#!/usr/bin/php -q
<?php
require_once “/usr/lib/rkb/functions-utf.inc.php”;
/**
Ian MIllard and Hugh Glaser
generates sitemap.xml for a linked data site that has
a triplestore which resolves URIs to provide a Symmetric Concise Bounded Description,
as well as a SPARQL endpoint,
and RDF files which are the source that populated the triplestore.
Now submits to search engines:
*/
$usage = “Usage: {$argv[0]} sub_domain_name\n”;
if(!isset($argv[1])) die($usage);
$base_domain = “rkbexplorer.com”;
$sub_domain = $argv[1];
$domain = $sub_domain.”.”.$base_domain;
$outfile = “../$domain/sitemap.xml”;
$file = fopen($outfile, “w”);
$slicing = “subject-object”;
$name = “”; if (file_exists(”../$domain/about/name.txt”)) $name = trim(entities2accents(file_get_contents(”../$domain/about/name.txt”)));
$typical = “”; if (file_exists(”../$domain/about/typical.txt”)) $typical = trim(file_get_contents(”../$domain/about/typical.txt”));
$updated = “”; exec(”/var/www/vhosts/wildcard.rkbexplorer.com/repositories/tools/rkb-utils last-update-w3c “.$sub_domain, $updated);
$changefreq = “monthly”; if (file_exists(”../$domain/about/changefreq.txt”)) $name = trim(file_get_contents(”../$domain/about/changefreq.txt”));
fwrite($file,”<?xml version=\”1.0\” encoding=\”UTF-8\”?>\n”);
fwrite($file,”<urlset xmlns=\”http://www.sitemaps.org/schemas/sitemap/0.9\”\n”);
fwrite($file,”        xmlns:sc=\”http://sw.deri.org/2007/07/sitemapextension/scschema.xsd\”>\n”);
fwrite($file,”  <sc:dataset>\n”);
fwrite($file,”    <sc:linkedDataPrefix slicing=\”$slicing\”>http://$domain/id/</sc:linkedDataPrefix>\n”);
fwrite($file,”    <sc:sparqlEndpointLocation>http://$domain/sparql/</sc:sparqlEndpointLocation>\n”);
$models = opendir(”../$domain/models”);
while (false !== ($model = readdir($models))) {
if (preg_match(’/\.rdf$/’, $model) || preg_match(’/\.ttl$/’, $model) || preg_match(’/\.n3$/’, $model) || preg_match(’/\.turtle$/’, $model) || preg_match(’/\.ntriples$/’, $model))
fwrite($file,”    <sc:dataDumpLocation>http://$domain/models/$model</sc:dataDumpLocation>\n”);
};
closedir($models);
fwrite($file,”    <sc:datasetURI>http://$domain/</sc:datasetURI>\n”);
fwrite($file,”    <sc:datasetURI>http://$domain/id/void</sc:datasetURI>\n”);
if ($name != “”) fwrite($file,”    <sc:datasetLabel>$name RDF dataset from RKBExplorer.com</sc:datasetLabel>\n”);
if ($typical != “”) fwrite($file,”    <sc:sampleURI>$typical</sc:sampleURI>\n”);
fwrite($file,”    <lastmod>$updated[0]</lastmod>\n”);
fwrite($file,”    <changefreq>$changefreq</changefreq>\n”);
fwrite($file,”  </sc:dataset>\n”);
fwrite($file,”</urlset>\n”);
fclose($file);
$sitemap_url = “http://$domain/sitemap.xml”;
// Submit to SWSE
print “Submitting $domain to SWSE: \n”;
$ch = curl_init(”http://swse.deri.org/ping?sitemap=$sitemap_url”);
curl_exec($ch);
print “\n”;
curl_close($ch);
// Submit POST request to Sindice
print “Submitting $domain to Sindice: \n”;
$data = “url=”.urlencode($sitemap_url);;
$fp = fsockopen(”sindice.com”, 80);
fputs($fp, “POST /api/v1/sitemap HTTP/1.0\r\n”);
fputs($fp, “Host: sindice.com\r\n”);
fputs($fp, “Content-type: application/x-www-form-urlencoded\r\n”);
fputs($fp, “Content-length: “. strlen($data) .”\r\n\r\n”);
fputs($fp, $data);
//  read result back from the sindice server
$result = ”;
while(!feof($fp)) $result .= fgets($fp, 128);
fclose($fp);
//  report server resonse
$status = substr($result, 0, strpos($result, “\n”));
preg_match(’@<h1>(.*?)</h1>@’, $result, $matches);
print “\t$status\n\t{$matches[1]}\n\n”;
//  submit to Ping the Semantic Web
passthru(”./ptsw.py $sitemap_url”);
?>

Leave a Reply

ERROR: si-captcha.php plugin says GD image support not detected in PHP!

Contact your web host and ask them why GD image support is not enabled for PHP.

ERROR: si-captcha.php plugin says imagepng function not detected in PHP!

Contact your web host and ask them why imagepng function is not enabled for PHP.