Contents of file 'sitemap.php':
1 <?php
2 //============================================================================
3 // File: sitemap.php [PHP script]
4 // Created: July 29, 2011 [v.1.0]
5 // Last change: September 28, 2011 [v.1.4]
6 // Author: Fredrik Jonsson <http://jonsson.eu/>
7 //
8 // DESCRIPTION
9 // The SITEMAP script dynamically parses a directory tree and generates a
10 // corresponding site map. This script provides the generating engine of
11 // the site map of jonsson.eu.
12 //
13 // The front end of the site map generator is the sitemap() function, which
14 // recursively parses the directory pointed out by the DOCUMENT_ROOT server
15 // variable for any subdirectories containing a .sitemap file. If a sub-
16 // directory contains such a file, then any present text in that file will
17 // be read and displayed as a corresponding text tag of the directory in the
18 // final site map. For any directories containing a .sitemap file, the parsing
19 // will continue in that directory until all valid tags have been found.
20 //
21 // For example, the .sitemap tag of the SITEMAP home URL (located at
22 // http://jonsson.eu/programs/php/sitemap/.sitemap) contains the text
23 // "The SITEMAP script dynamically parses a directory tree and
24 // generates a corresponding site map. This script provides
25 // the generating engine of the <a href="/sitemap/">site
26 // map</a> of jonsson.eu."
27 //
28 // Any subdirectories _not_ containing a .sitemap file will be omitted from
29 // the final site map, and the recursive parsing through these directories
30 // will be terminated (hence omitting any sub-subdirectories which may
31 // contain .sitemap files).
32 //
33 // PARAMETERS
34 // In the sitemap($lang) function, the input argument determines the
35 // language to use for headers in generation of the quote(s). Accepted
36 // values are "en" (for English) or "sv" (for Swedish).
37 //
38 // The sitemap($lang) function automatically starts the recursive parsing
39 // in the directory pointed out by the <tt>DOCUMENT_ROOT</tt> variable (in
40 // PHP accessed via <tt>$_SERVER['DOCUMENT_ROOT']</tt>). In case the site
41 // map should be generated for a different directory,
42 // $_SERVER['DOCUMENT_ROOT']</tt> should in the script be replaced by
43 // whatever one instead wish to parse.
44 //
45 // EXAMPLE OF USAGE
46 // In order to invoke SITEMAP for the generation of a site map, simply add
47 // the following PHP block to an (X)HTML-generating PHP page:
48 // <?php
49 // include "sitemap.php";
50 // sitemap("en");
51 // ?>
52 //
53 // Here, the "en" (for English) may be replaced by "sv" (for Swedish).
54 // This is the way the <a href="/sitemap/">site map of jonsson.eu</a>
55 // is generated.
56 //
57 // Copyright (C) 2011, Fredrik Jonsson <http://jonsson.eu/>
58 // Non-commercial copying welcome.
59 //============================================================================
60 //
61 // ###########################################################################
62 // ######## CUSTOMIZE THE FOLLOWING PARAMETERS WITH YOUR OWN SETTINGS ########
63 // ######## FOR DETAILS, SEE CONFIG INSTRUCTIONS IN THE HEADER ABOVE #########
64 // ###########################################################################
65 //
66 $default_xmlfile="/sitemap.xml";
67 $xml_keystring="as simple as possible, but no simpler";
68 //
69 // ###########################################################################
70 // ######## END OF USER CUSTOMIZABLE PARAMETERS ##############################
71 // ###########################################################################
72 //
73
74 //
75 // Inclusion to get access to the modification_time_of_url($fullpath) function.
76 //
77 include_once rtrim($_SERVER['DOCUMENT_ROOT'],"/")."/php/"."lastmodified.php";
78
79 if (!function_exists("swlss")) {
80 function swlss($lang) {
81 if (!strcmp($lang,"sv")) {
82 echo "?lang=sv";
83 } else {
84 echo "";
85 }
86 return;
87 }
88 }
89
90 function get_filenames($directory) {
91 //
92 // Open the specified directory and read the file names and subdirectories
93 // it contains into an array 'filenames[]'.
94 //
95 $dir=dir($directory);
96 $k=1;
97 while (($file = $dir->read()) !== false) {
98 $filenames[$k] = $directory.$file;
99 $k++;
100 }
101 sort($filenames); // Make sure directories are sorted in ascending order
102 $filenames[0]=$k-1; // This is the array size
103 $dir->close(); // For the sake of principles, close the opened directory
104 return $filenames; // Returned array of filenames including absolute path
105 }
106
107 //
108 // The is_directory($location) function takes a location as input and
109 // returns "true" if the location is found to point out a directory;
110 // otherwise "false" is returned.
111 //
112 function is_directory($location) {
113 if (@dir($location)!==false) {
114 return true; // Yes, the entity pointed out is a directory
115 } else {
116 return false; // No, the entity pointed out is not a directory
117 }
118 }
119
120 function is_protected_directory($directory) {
121 if (file_exists(rtrim($directory,"/")."/.protected")) {
122 return true; // Yes, the directory is protected
123 } else {
124 return false; // No, the directory is not protected
125 }
126 }
127
128 function is_visible_directory($directory) {
129 if (file_exists(rtrim($directory,"/")."/.sitemap")) {
130 return true; // Yes, the directory is to be visible in the site map
131 } else {
132 return false; // No, the directory is to be hidden in the site map
133 }
134 }
135
136 //
137 // The is_root_directory($directory,$siteroot) function determines whether
138 // the supplied $directory string corresponds to the root directory of the
139 // site. If so, then 'true' is returned; otehrwise 'false'.
140 //
141 function is_root_directory($directory,$siteroot) {
142 if (strcmp(trim($directory,"/"),trim($siteroot,"/"))!=0) {
143 return(false); // No, this the site root
144 } else {
145 return(true); // Yes, this the site root
146 }
147 }
148
149 function get_visible_subdirectories($directory) {
150 //
151 // First of all, obtain a list of all available files and subdirectories
152 // contained in the directory as pointed out by the $directory parameter.
153 //
154 $filenames=get_filenames($directory);
155 $kmax=$filenames[0]; // The number of files and subdirectories
156 $j=1;
157 $k=1;
158 while ($k <= $kmax) { // For all items in the $filenames array
159 $filename=pathinfo($filenames[$k]);
160 if (is_directory($filenames[$k])) { // If entry is a directory, ...
161 if (($filename["basename"]!==".")&& // ... and is neither "." ...
162 ($filename["basename"]!=="..")) { // ... nor "..", ...
163 if ((!is_protected_directory($filenames[$k])) // not pwd protected, ...
164 &&(is_visible_directory($filenames[$k]))) { // ... and visible,..
165 $dirnames[$j]=$filenames[$k]; // ... then save dir name ...
166 $dirnames[$j]=rtrim($dirnames[$j],"/")."/"; // ... append "/".
167 $j++;
168 }
169 }
170 }
171 $k++;
172 }
173 $dirnames[0]=$j-1;
174 return $dirnames;
175 }
176
177 //
178 // Indent two regular spaces per recursion level.
179 //
180 function indent($reclevel) {
181 for ($i=0;$i<$reclevel;$i++) echo " ";
182 return;
183 }
184
185 function display_summary($filename) {
186 if (file_exists($filename)) {
187 if (is_readable($filename)) {
188 $fp=fopen($filename,r);
189 if (!$fp)
190 echo "Error: Could not open $filename for reading.<br/>";
191 } else {
192 echo "Error: Summary file '$filename' is not readable.<br/>\n";
193 }
194 } else {
195 echo "Error: Summary file '$filename' does not exist.<br/>\n";
196 }
197 echo "<br/>";
198 //
199 // Output the content of the supplied summary file, replacing any
200 // newline characters by spaces.
201 //
202 echo "<i>";
203 while (!feof($fp)) echo str_replace("\n"," ",fgets($fp));
204 echo "</i>";
205 fclose($fp);
206 return;
207 }
208
209 function display_title($lang,$filename,$linkdir) {
210 if (file_exists($filename)) {
211 if (is_readable($filename)) {
212 $fp=fopen($filename,r);
213 if (!$fp)
214 echo "Error: Could not open $filename for reading.<br/>";
215 } else {
216 echo "Error: Summary file '$filename' is not readable.<br/>\n";
217 }
218 } else {
219 echo "Error: Summary file '$filename' does not exist.<br/>\n";
220 }
221 echo "<a href=\"".$linkdir;
222 swlss($lang);
223 echo "\"><b>";
224
225 //
226 // Output the content of the supplied title file, replacing any
227 // newline characters by spaces.
228 //
229 while (!feof($fp)) echo str_replace("\n"," ",fgets($fp));
230 echo "</b></a>";
231 fclose($fp);
232 return;
233 }
234
235 function display_timestamp($lang,$siteroot,$directory) {
236 //
237 // Make sure the supplied directory path string is of the form
238 // /path/to/directory/, stripped from the site root path.
239 //
240 $directory="/".trim(substr($directory,strlen($siteroot)),"/")."/";
241 $modtime=modification_time_of_url($siteroot.$directory);
242 echo "<span class=\"lastModified\"><i>";
243 switch ($lang) {
244 case en:
245 echo "—".gmdate('l d M, Y',$modtime)."";
246 break;
247 case sv:
248 // Still to do: Swedish weekdays!
249 echo "—".gmdate('l d M, Y',$modtime)."";
250 break;
251 default:
252 echo "PHP Error: Language switch \"".$lang."\" is not recognized!\n";
253 }
254 echo "</i></span>";
255 return;
256 }
257
258 function xml_timestamp_string($lang,$siteroot,$directory) {
259 //
260 // Make sure the supplied directory path string is of the form
261 // /path/to/directory/, stripped from the site root path.
262 //
263 $directory="/".trim(substr($directory,strlen($siteroot)),"/")."/";
264 $modtime=modification_time_of_url($siteroot.$directory);
265 //
266 // Conform to W3C Datetime format, http://www.w3.org/TR/NOTE-datetime
267 // Complete date plus hours, minutes and seconds:
268 // YYYY-MM-DDThh:mm:ssTZD (eg 1997-07-16T19:20:30+01:00)
269 //
270 // return(gmdate('M d Y H:i:s',$modtime));
271 //
272 return(gmdate('Y-m-d',$modtime));
273 }
274
275 //
276 // The recursively_list_subdirectories_xhtml($directory,$reclevel,$siteroot)
277 // function does exactly what its name suggests: A recursive listing of the
278 // directories starting from the directory root supplied via the $directory
279 // parameter, meanwhile generating the XHTML corresponding to the directory
280 // tree.
281 //
282 // In case the supplied directory is the site root, the routine ignores any
283 // description of the current directory and instead immediately proceeds with
284 // the task of traversing all sub-directories.
285 //
286 function recursively_list_subdirectories_xhtml($lang,$directory,
287 $reclevel,$siteroot) {
288
289 //
290 // Get the number of "visible" subdirectories contained in the directory
291 // as pointed out by the $directory parameter. If there are no "visible"
292 // subdirectories to display, then list only the current directory and
293 // return; otherwise make calls to recursively_list_subdirectories_xhtml()
294 // for each of the subdirectories remaining to be listed.
295 //
296 $dirnames=get_visible_subdirectories($directory);
297 $kmax=$dirnames[0]; // The number of subdirectories to list
298 indent($reclevel); // Nicely indent the current list item to be displayed
299
300 //
301 // Construct a uniform text string for creating a link to the
302 // current directory (any subdirectories will follow later on
303 // in the recursive step). Make sure the directory path string
304 // is of the form /path/to/directory/.
305 //
306 $linkdir="/".trim(substr($directory,strlen($siteroot)),"/")."/";
307
308 if (!is_root_directory($directory,$siteroot)) {
309 echo "<li>";
310 //
311 // If a page title (contained in a .title file) exists in the current
312 // directory, then add this as a descriptive title to the list item;
313 // otherwise, the directory name is used.
314 //
315 if (file_exists($directory.".title")) {
316 display_title($lang,$directory.".title",$linkdir);
317 } else {
318 echo "<a href=\"".$linkdir;
319 swlss($lang);
320 echo "\">".basename($directory)."</a>";
321 }
322
323 //
324 // If a summary (contained in a .sitemap file) exists in the current
325 // directory, then add this as a descriptive text to the list item.
326 //
327 if (file_exists($directory.".sitemap"))
328 display_summary($directory.".sitemap");
329
330 display_timestamp($lang,$siteroot,$directory);
331 }
332
333 //
334 // If the current directory has been found to contain subdirectories
335 // which should be visible (that is to say, if $kmax>0, this number
336 // having been previously obtained as the first returned argument in
337 // the call to the get_visible_subdirectories($directory) function),
338 // then go ahead and generate an unordered list (<ul>) for the sub-
339 // directories. This list is then populated with list items (<li>)
340 // recursively.
341 //
342 if ($kmax>0) {
343 echo "\n"; // Always begin a new list at a new line, ...
344 indent($reclevel); // ... which is properly indented and readable.
345 echo "<ul> <!-- Listing of subdirectories in $directory "
346 ."(recursion level $reclevel) -->\n";
347 for ($k=1;$k<=$kmax;$k++) // For all subdirectories
348 recursively_list_subdirectories_xhtml($lang,$dirnames[$k],
349 $reclevel+1,$siteroot);
350 indent($reclevel);
351 echo "</ul> <!-- End of listing of subdirectories in $directory "
352 ."(recursion level $reclevel) -->\n";
353 indent($reclevel); // To provide a neat closing of the </li>
354 } else {
355 if ($kmax<0) {
356 echo "Fatal error encountered (negative kmax=$kmax)!";
357 exit(1);
358 }
359 }
360
361 //
362 // Close the current list item (<li>), regardless of whether it contains
363 // a single description or several hierarchies of listed sub-directories.
364 //
365 if (!is_root_directory($directory,$siteroot)) {
366 echo "</li>\n";
367 }
368
369 return;
370 }
371
372 function sitemap_xhtml($lang) {
373 //
374 // Here the $siteroot parameter is initialized to contain the base directory
375 // of the web server catalogue, typically of the form /base/of/my/web/dir/
376 //
377 $sitemap_url="http://jonsson.eu/programs/php/sitemap/";
378 $bannerstring="Code automatically generated by SITEMAP 1.0, ".$sitemap_url;
379 $siteroot=rtrim($_SERVER['DOCUMENT_ROOT'],"/")."/"; // Ensure trailing slash!
380 echo "<!-- $bannerstring -->\n";
381 echo "<div id=\"sitemap\">\n";
382 switch ($lang) {
383 case en:
384 echo "<h1>Sitemap</h1>\n";
385 break;
386 case sv:
387 echo "<h1>Sajtkarta</h1>\n";
388 break;
389 default:
390 echo "PHP Error: Language switch \"".$lang."\" is not recognized!\n";
391 }
392 recursively_list_subdirectories_xhtml($lang,$siteroot,0,$siteroot);
393 //
394 // Outcomment the following, which is specific for jonsson.eu and relies
395 // on an external script!
396 //
397 echo " <br/>";
398 if ((strcmp($_SERVER['HTTP_HOST'],"jonsson.eu")==0)
399 ||(strcmp($_SERVER['HTTP_HOST'],"localhost")==0)
400 ||(strcmp($_SERVER['HTTP_HOST'],"dirac.local")==0)) { // If on my machine
401 echo "<p>\n";
402 echo "<a href=\"?viewsrc=".md5("../sitemap.xml")."\">View the XML code ".
403 "of this sitemap</a> \n";
404 echo "(Current host is ".$_SERVER['HTTP_HOST'].")\n";
405 echo "</p>\n";
406 }
407 echo " <br/>";
408 echo " <p>";
409 echo "Generated recursively by <a href=\"".$sitemap_url."\">::sitemap::</a>";
410 echo "</p>\n";
411 echo "</div>\n";
412 echo "<!-- End of SITEMAP automatically generated code -->\n";
413 return;
414 }
415
416 function recursively_list_subdirectories_xml($fp,$lang,$directory,
417 $reclevel,$siteroot) {
418
419 //
420 // Get the number of "visible" subdirectories contained in the directory
421 // as pointed out by the $directory parameter. If there are no "visible"
422 // subdirectories to display, then list only the current directory and
423 // return; otherwise make calls to recursively_list_subdirectories_xhtml()
424 // for each of the subdirectories remaining to be listed.
425 //
426 $dirnames=get_visible_subdirectories($directory);
427 $kmax=$dirnames[0]; // The number of subdirectories to list
428
429 //
430 // Construct a uniform text string for creating a link to the
431 // current directory (any subdirectories will follow later on
432 // in the recursive step). Make sure the directory path string
433 // is of the form /path/to/directory/.
434 //
435 $linkdir="http://".$_SERVER['HTTP_HOST']
436 ."/".trim(substr($directory,strlen($siteroot)),"/")."/";
437
438 fwrite($fp,"<url>\n");
439
440 fwrite($fp," <loc>");
441 fwrite($fp,$linkdir);
442 fwrite($fp,"</loc>\n");
443
444 $timestamp_string=xml_timestamp_string($lang,$siteroot,$directory);
445 fwrite($fp," <lastmod>");
446 fwrite($fp,$timestamp_string);
447 fwrite($fp,"</lastmod>\n");
448
449 fwrite($fp," <changefreq>");
450 fwrite($fp,"monthly");
451 fwrite($fp,"</changefreq>\n");
452
453 fwrite($fp," <priority>");
454 fwrite($fp,"0.7");
455 fwrite($fp,"</priority>\n");
456
457 fwrite($fp,"</url>\n");
458
459 //
460 // If the current directory has been found to contain subdirectories
461 // which should be visible (that is to say, if $kmax>0, this number
462 // having been previously obtained as the first returned argument in
463 // the call to the get_visible_subdirectories($directory) function),
464 // then go ahead and generate an unordered list (<ul>) for the sub-
465 // directories. This list is then populated with list items (<li>)
466 // recursively.
467 //
468 if ($kmax>0) {
469 for ($k=1;$k<=$kmax;$k++) // For all subdirectories
470 recursively_list_subdirectories_xml($fp,$lang,$dirnames[$k],
471 $reclevel+1,$siteroot);
472 } else {
473 if ($kmax<0) {
474 echo "Fatal error encountered (negative kmax=$kmax)!";
475 exit(1);
476 }
477 }
478
479 return;
480 }
481
482 function sitemap_xml($lang,$sitemap_file) {
483 $siteroot=rtrim($_SERVER['DOCUMENT_ROOT'],"/")."/"; // Ensure trailing slash!
484 $filename=$siteroot.$sitemap_file;
485 if (file_exists($filename)) {
486 //
487 // PHP should Under no circumstances have the following permission.
488 //
489 // chmod($siteroot.$sitemap_file,666);
490 echo "<p>Overwriting site map $filename</p>\n";
491 }
492 echo "<p>Generation of XML started at ".gmdate("M d Y H:i:s",mktime())."</p>";
493 $fp=fopen($filename,'w');
494 if (!$fp) {
495 echo "<p>Error: Could not open $filename for writing.</p>";
496 return;
497 }
498 fwrite($fp,"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
499 fwrite($fp,
500 "<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\n");
501 recursively_list_subdirectories_xml($fp,$lang,$siteroot,0,$siteroot);
502 fwrite($fp,"</urlset>\n");
503 fclose($fp);
504 echo "<p>Generation of XML finished at ".gmdate("M d Y H:i:s",mktime())."</p>";
505 echo "<p><a href=\"$sitemap_file\">View generated XML file</a></p>";
506 return;
507 }
508
509 function sitemap($lang) {
510 $sitemap_mode="xhtml"; // Default mode of sitemap generation
511 $sitemap_file=$GLOBALS['default_xmlfile']; // Default file for XML output
512 $numargs=func_num_args(); // Number of input args to the function
513 $arg_list = func_get_args(); // The input args to the function
514
515 //
516 // If more than one input arguments, ...
517 //
518 if ($numargs >= 2) {
519 //
520 // ... then the second argument is the mode parameter ("xhtml" or "xml")
521 //
522 $sitemap_mode=trim($arg_list[1]);
523 if (!((strcmp($sitemap_mode,"xhtml")==0)
524 ||(strcmp($sitemap_mode,"xml")==0))) {
525 echo "Error! Unknown supplied mode '$sitemap_mode'!";
526 return;
527 }
528 //
529 // ... while, on the other hand, if we have more than two input arguments,
530 //
531 if ($numargs >= 3) {
532 //
533 // ... then the third argument is taken as the file name of the
534 // XML output. (This argument has no impact whatsoever unless
535 // the second argument was "xml".)
536 //
537 $sitemap_file=$arg_list[2];
538 }
539 }
540
541 //
542 // If a valid switch is present at the URL, then override the optional
543 // input arguments and go ahead with simply generating XML output in
544 // the default XML file.
545 //
546 if (($md5hash=$_GET["xmlkey"])!="") {
547 //
548 // Unless the supplied MD% hash code matches that of the key word,
549 // do nothing to the XML file and instead generate the XHTML output.
550 //
551 if (strcmp(trim($md5hash),md5(trim($GLOBALS['xml_keystring'])))==0) {
552 // 25a7551f7a613cf4926fdd89eff66aae
553 $sitemap_mode="xml";
554 $sitemap_file=$GLOBALS['default_xmlfile']; // Default file for XML output
555 } else {
556 $sitemap_mode="xhtml";
557 }
558 }
559
560 if (strcmp($sitemap_mode,"xhtml")==0) {
561 //
562 // Generate XHTML for the display of the site map.
563 //
564 sitemap_xhtml($lang);
565 }
566
567 if (strcmp($sitemap_mode,"xml")==0) {
568 //
569 // Generate XHTML for the display of the site map.
570 //
571 sitemap_xml($lang,$sitemap_file);
572 }
573
574 return;
575 }
576
577 ?>
578
Generated by ::viewsrc::