Search:

Return to previous page

Contents of file 'sitemap.php':



    1   <?php
    2   //============================================================================
    3   // File:        sitemap.php [PHP script]
    4   // Created:     July 29, 2011 [v.1.0]
    5   // Last change: September 28, 2011 [v.1.4]
    6   // Author:      Fredrik Jonsson <http://jonsson.eu/>
    7   //
    8   // DESCRIPTION
    9   //  The SITEMAP script dynamically parses a directory tree and generates a
   10   //  corresponding site map. This script provides the generating engine of
   11   //  the site map of jonsson.eu.
   12   //
   13   //  The front end of the site map generator is the sitemap() function, which
   14   //  recursively parses the directory pointed out by the DOCUMENT_ROOT server
   15   //  variable for any subdirectories containing a .sitemap file. If a sub-
   16   //  directory contains such a file, then any present text in that file will
   17   //  be read and displayed as a corresponding text tag of the directory in the
   18   //  final site map. For any directories containing a .sitemap file, the parsing
   19   //  will continue in that directory until all valid tags have been found.
   20   //
   21   //  For example, the .sitemap tag of the SITEMAP home URL (located at
   22   //  http://jonsson.eu/programs/php/sitemap/.sitemap) contains the text
   23   //      "The SITEMAP script dynamically parses a directory tree and
   24   //       generates a corresponding site map. This script provides
   25   //       the generating engine of the <a href="/sitemap/">site
   26   //       map</a> of jonsson.eu."
   27   //
   28   //  Any subdirectories _not_ containing a .sitemap file will be omitted from
   29   //  the final site map, and the recursive parsing through these directories
   30   //  will be terminated (hence omitting any sub-subdirectories which may
   31   //  contain .sitemap files).
   32   //
   33   // PARAMETERS
   34   //  In the sitemap($lang) function, the input argument determines the
   35   //  language to use for headers in generation of the quote(s). Accepted
   36   //  values are "en" (for English) or "sv" (for Swedish).
   37   //
   38   //  The sitemap($lang) function automatically starts the recursive parsing
   39   //  in the directory pointed out by the <tt>DOCUMENT_ROOT</tt> variable (in
   40   //  PHP accessed via <tt>$_SERVER['DOCUMENT_ROOT']</tt>). In case the site
   41   //  map should be generated for a different directory,
   42   //  $_SERVER['DOCUMENT_ROOT']</tt> should in the script be replaced by
   43   //  whatever one instead wish to parse.
   44   //
   45   // EXAMPLE OF USAGE
   46   //  In order to invoke SITEMAP for the generation of a site map, simply add
   47   //  the following PHP block to an (X)HTML-generating PHP page:
   48   //       &lt;?php
   49   //          include "sitemap.php";
   50   //          sitemap("en");
   51   //       ?&gt;
   52   //
   53   //  Here, the "en" (for English) may be replaced by "sv" (for Swedish).
   54   //  This is the way the <a href="/sitemap/">site map of jonsson.eu</a>
   55   //  is generated.
   56   //
   57   //  Copyright (C) 2011, Fredrik Jonsson <http://jonsson.eu/>
   58   //  Non-commercial copying welcome.
   59   //============================================================================
   60   //
   61   // ###########################################################################
   62   // ######## CUSTOMIZE THE FOLLOWING PARAMETERS WITH YOUR OWN SETTINGS ########
   63   // ######## FOR DETAILS, SEE CONFIG INSTRUCTIONS IN THE HEADER ABOVE #########
   64   // ###########################################################################
   65   //
   66   $default_xmlfile="/sitemap.xml";
   67   $xml_keystring="as simple as possible, but no simpler";
   68   //
   69   // ###########################################################################
   70   // ######## END OF USER CUSTOMIZABLE PARAMETERS ##############################
   71   // ###########################################################################
   72   //
   73   
   74   //
   75   // Inclusion to get access to the modification_time_of_url($fullpath) function.
   76   //
   77   include_once rtrim($_SERVER['DOCUMENT_ROOT'],"/")."/php/"."lastmodified.php";
   78   
   79   if (!function_exists("swlss")) {
   80     function swlss($lang) {
   81       if (!strcmp($lang,"sv")) {
   82         echo "?lang=sv";
   83       } else {
   84         echo "";
   85       }
   86       return;
   87     }
   88   }
   89   
   90   function get_filenames($directory) {
   91     //
   92     // Open the specified directory and read the file names and subdirectories
   93     // it contains into an array 'filenames[]'.
   94     //
   95     $dir=dir($directory);
   96     $k=1;
   97     while (($file = $dir->read()) !== false) {
   98       $filenames[$k] = $directory.$file;
   99       $k++;
  100     }
  101     sort($filenames);   // Make sure directories are sorted in ascending order
  102     $filenames[0]=$k-1; // This is the array size
  103     $dir->close();      // For the sake of principles, close the opened directory
  104     return $filenames;  // Returned array of filenames including absolute path
  105   }
  106   
  107   //
  108   // The is_directory($location) function takes a location as input and
  109   // returns "true" if the location is found to point out a directory;
  110   // otherwise "false" is returned.
  111   //
  112   function is_directory($location) {
  113     if (@dir($location)!==false) {
  114       return true; // Yes, the entity pointed out is a directory
  115     } else {
  116       return false; // No, the entity pointed out is not a directory
  117     }
  118   }
  119   
  120   function is_protected_directory($directory) {
  121     if (file_exists(rtrim($directory,"/")."/.protected")) {
  122       return true; // Yes, the directory is protected
  123     } else {
  124       return false; // No, the directory is not protected
  125     }
  126   }
  127   
  128   function is_visible_directory($directory) {
  129     if (file_exists(rtrim($directory,"/")."/.sitemap")) {
  130       return true; // Yes, the directory is to be visible in the site map
  131     } else {
  132       return false; // No, the directory is to be hidden in the site map
  133     }
  134   }
  135   
  136   //
  137   // The is_root_directory($directory,$siteroot) function determines whether
  138   // the supplied $directory string corresponds to the root directory of the
  139   // site. If so, then 'true' is returned; otehrwise 'false'.
  140   //
  141   function is_root_directory($directory,$siteroot) {
  142     if (strcmp(trim($directory,"/"),trim($siteroot,"/"))!=0) {
  143       return(false); // No, this the site root
  144     } else {
  145       return(true); // Yes, this the site root
  146     }
  147   }
  148   
  149   function get_visible_subdirectories($directory) {
  150     //
  151     // First of all, obtain a list of all available files and subdirectories
  152     // contained in the directory as pointed out by the $directory parameter.
  153     //
  154     $filenames=get_filenames($directory);
  155     $kmax=$filenames[0]; // The number of files and subdirectories
  156     $j=1;
  157     $k=1;
  158     while ($k <= $kmax) { // For all items in the $filenames array
  159       $filename=pathinfo($filenames[$k]);
  160       if (is_directory($filenames[$k])) {     // If entry is a directory, ...
  161         if (($filename["basename"]!==".")&&   // ... and is neither "." ...
  162             ($filename["basename"]!=="..")) { // ... nor "..", ...
  163           if ((!is_protected_directory($filenames[$k])) // not pwd protected, ...
  164                 &&(is_visible_directory($filenames[$k]))) { // ... and visible,..
  165             $dirnames[$j]=$filenames[$k];       // ... then save dir name ...
  166             $dirnames[$j]=rtrim($dirnames[$j],"/")."/"; // ... append "/".
  167             $j++;
  168           }
  169         }
  170       }
  171       $k++;
  172     }
  173     $dirnames[0]=$j-1;
  174     return $dirnames;
  175   }
  176   
  177   //
  178   // Indent two regular spaces per recursion level.
  179   //
  180   function indent($reclevel) {
  181     for ($i=0;$i<$reclevel;$i++) echo "  ";
  182     return;
  183   }
  184   
  185   function display_summary($filename) {
  186     if (file_exists($filename)) {
  187       if (is_readable($filename)) {
  188         $fp=fopen($filename,r);
  189         if (!$fp)
  190           echo "Error: Could not open $filename for reading.<br/>";
  191       } else {
  192         echo "Error: Summary file '$filename' is not readable.<br/>\n";
  193       }
  194     } else {
  195       echo "Error: Summary file '$filename' does not exist.<br/>\n";
  196     }
  197     echo "<br/>";
  198     //
  199     // Output the content of the supplied summary file, replacing any
  200     // newline characters by spaces.
  201     //
  202     echo "<i>";
  203     while (!feof($fp)) echo str_replace("\n"," ",fgets($fp));
  204     echo "</i>";
  205     fclose($fp);
  206     return;
  207   }
  208   
  209   function display_title($lang,$filename,$linkdir) {
  210     if (file_exists($filename)) {
  211       if (is_readable($filename)) {
  212         $fp=fopen($filename,r);
  213         if (!$fp)
  214           echo "Error: Could not open $filename for reading.<br/>";
  215       } else {
  216         echo "Error: Summary file '$filename' is not readable.<br/>\n";
  217       }
  218     } else {
  219       echo "Error: Summary file '$filename' does not exist.<br/>\n";
  220     }
  221     echo "<a href=\"".$linkdir;
  222     swlss($lang);
  223     echo "\"><b>";
  224   
  225     //
  226     // Output the content of the supplied title file, replacing any
  227     // newline characters by spaces.
  228     //
  229     while (!feof($fp)) echo str_replace("\n"," ",fgets($fp));
  230     echo "</b></a>";
  231     fclose($fp);
  232     return;
  233   }
  234   
  235   function display_timestamp($lang,$siteroot,$directory) {
  236     //
  237     // Make sure the supplied directory path string is of the form
  238     // /path/to/directory/, stripped from the site root path.
  239     //
  240     $directory="/".trim(substr($directory,strlen($siteroot)),"/")."/";
  241     $modtime=modification_time_of_url($siteroot.$directory);
  242     echo "<span class=\"lastModified\"><i>";
  243     switch ($lang) {
  244       case en:
  245         echo "&mdash;".gmdate('l d M, Y',$modtime)."";
  246         break;
  247       case sv:
  248         // Still to do: Swedish weekdays!
  249         echo "&mdash;".gmdate('l d M, Y',$modtime)."";
  250         break;
  251       default:
  252         echo "PHP Error: Language switch \"".$lang."\" is not recognized!\n";
  253     }
  254     echo "</i></span>";
  255     return;
  256   }
  257   
  258   function xml_timestamp_string($lang,$siteroot,$directory) {
  259     //
  260     // Make sure the supplied directory path string is of the form
  261     // /path/to/directory/, stripped from the site root path.
  262     //
  263     $directory="/".trim(substr($directory,strlen($siteroot)),"/")."/";
  264     $modtime=modification_time_of_url($siteroot.$directory);
  265     //
  266     // Conform to W3C Datetime format, http://www.w3.org/TR/NOTE-datetime
  267     //   Complete date plus hours, minutes and seconds:
  268     //      YYYY-MM-DDThh:mm:ssTZD (eg 1997-07-16T19:20:30+01:00)
  269     //
  270     // return(gmdate('M d Y H:i:s',$modtime));
  271     //
  272     return(gmdate('Y-m-d',$modtime));
  273   }
  274   
  275   //
  276   // The recursively_list_subdirectories_xhtml($directory,$reclevel,$siteroot)
  277   // function does exactly what its name suggests: A recursive listing of the
  278   // directories starting from the directory root supplied via the $directory
  279   // parameter, meanwhile generating the XHTML corresponding to the directory
  280   // tree.
  281   //
  282   // In case the supplied directory is the site root, the routine ignores any
  283   // description of the current directory and instead immediately proceeds with
  284   // the task of traversing all sub-directories.
  285   //
  286   function recursively_list_subdirectories_xhtml($lang,$directory,
  287                                                       $reclevel,$siteroot) {
  288   
  289     //
  290     // Get the number of "visible" subdirectories contained in the directory
  291     // as pointed out by the $directory parameter. If there are no "visible"
  292     // subdirectories to display, then list only the current directory and
  293     // return; otherwise make calls to recursively_list_subdirectories_xhtml()
  294     // for each of the subdirectories remaining to be listed.
  295     //
  296     $dirnames=get_visible_subdirectories($directory);
  297     $kmax=$dirnames[0]; // The number of subdirectories to list
  298     indent($reclevel);  // Nicely indent the current list item to be displayed
  299   
  300     //
  301     // Construct a uniform text string for creating a link to the
  302     // current directory (any subdirectories will follow later on
  303     // in the recursive step). Make sure the directory path string
  304     // is of the form /path/to/directory/.
  305     //
  306     $linkdir="/".trim(substr($directory,strlen($siteroot)),"/")."/";
  307   
  308     if (!is_root_directory($directory,$siteroot)) {
  309       echo "<li>";
  310       //
  311       // If a page title (contained in a .title file) exists in the current
  312       // directory, then add this as a descriptive title to the list item;
  313       // otherwise, the directory name is used.
  314       //
  315       if (file_exists($directory.".title")) {
  316         display_title($lang,$directory.".title",$linkdir);
  317       } else {
  318         echo "<a href=\"".$linkdir;
  319         swlss($lang);
  320         echo "\">".basename($directory)."</a>";
  321       }
  322   
  323       //
  324       // If a summary (contained in a .sitemap file) exists in the current
  325       // directory, then add this as a descriptive text to the list item.
  326       //
  327       if (file_exists($directory.".sitemap"))
  328         display_summary($directory.".sitemap");
  329   
  330       display_timestamp($lang,$siteroot,$directory);
  331     }
  332   
  333     //
  334     // If the current directory has been found to contain subdirectories
  335     // which should be visible (that is to say, if $kmax>0, this number
  336     // having been previously obtained as the first returned argument in
  337     // the call to the get_visible_subdirectories($directory) function),
  338     // then go ahead and generate an unordered list (<ul>) for the sub-
  339     // directories. This list is then populated with list items (<li>)
  340     // recursively.
  341     //
  342     if ($kmax>0) {
  343       echo "\n";          // Always begin a new list at a new line, ...
  344       indent($reclevel);    // ... which is properly indented and readable.
  345       echo "<ul> <!-- Listing of subdirectories in $directory "
  346             ."(recursion level $reclevel) -->\n";
  347       for ($k=1;$k<=$kmax;$k++) // For all subdirectories
  348         recursively_list_subdirectories_xhtml($lang,$dirnames[$k],
  349              $reclevel+1,$siteroot);
  350       indent($reclevel);
  351       echo "</ul> <!-- End of listing of subdirectories in $directory "
  352             ."(recursion level $reclevel) -->\n";
  353       indent($reclevel); // To provide a neat closing of the </li>
  354     } else {
  355       if ($kmax<0) {
  356         echo "Fatal error encountered (negative kmax=$kmax)!";
  357         exit(1);
  358       }
  359     }
  360   
  361     //
  362     // Close the current list item (<li>), regardless of whether it contains
  363     // a single description or several hierarchies of listed sub-directories.
  364     //
  365     if (!is_root_directory($directory,$siteroot)) {
  366       echo "</li>\n";
  367     }
  368   
  369     return;
  370   }
  371   
  372   function sitemap_xhtml($lang) {
  373     //
  374     // Here the $siteroot parameter is initialized to contain the base directory
  375     // of the web server catalogue, typically of the form /base/of/my/web/dir/
  376     //
  377     $sitemap_url="http://jonsson.eu/programs/php/sitemap/";
  378     $bannerstring="Code automatically generated by SITEMAP 1.0, ".$sitemap_url;
  379     $siteroot=rtrim($_SERVER['DOCUMENT_ROOT'],"/")."/"; // Ensure trailing slash!
  380     echo "<!-- $bannerstring -->\n";
  381     echo "<div id=\"sitemap\">\n";
  382     switch ($lang) {
  383       case en:
  384         echo "<h1>Sitemap</h1>\n";
  385         break;
  386       case sv:
  387         echo "<h1>Sajtkarta</h1>\n";
  388         break;
  389       default:
  390         echo "PHP Error: Language switch \"".$lang."\" is not recognized!\n";
  391     }
  392     recursively_list_subdirectories_xhtml($lang,$siteroot,0,$siteroot);
  393     //
  394     // Outcomment the following, which is specific for jonsson.eu and relies
  395     // on an external script!
  396     //
  397     echo "  <br/>";
  398     if ((strcmp($_SERVER['HTTP_HOST'],"jonsson.eu")==0)
  399         ||(strcmp($_SERVER['HTTP_HOST'],"localhost")==0)
  400         ||(strcmp($_SERVER['HTTP_HOST'],"dirac.local")==0)) { // If on my machine
  401       echo "<p>\n";
  402       echo "<a href=\"?viewsrc=".md5("../sitemap.xml")."\">View the XML code ".
  403            "of this sitemap</a> \n";
  404       echo "(Current host is ".$_SERVER['HTTP_HOST'].")\n";
  405       echo "</p>\n";
  406     }
  407     echo "  <br/>";
  408     echo "  <p>";
  409     echo "Generated recursively by <a href=\"".$sitemap_url."\">::sitemap::</a>";
  410     echo "</p>\n";
  411     echo "</div>\n";
  412     echo "<!-- End of SITEMAP automatically generated code -->\n";
  413     return;
  414   }
  415   
  416   function recursively_list_subdirectories_xml($fp,$lang,$directory,
  417                                                       $reclevel,$siteroot) {
  418   
  419     //
  420     // Get the number of "visible" subdirectories contained in the directory
  421     // as pointed out by the $directory parameter. If there are no "visible"
  422     // subdirectories to display, then list only the current directory and
  423     // return; otherwise make calls to recursively_list_subdirectories_xhtml()
  424     // for each of the subdirectories remaining to be listed.
  425     //
  426     $dirnames=get_visible_subdirectories($directory);
  427     $kmax=$dirnames[0]; // The number of subdirectories to list
  428   
  429     //
  430     // Construct a uniform text string for creating a link to the
  431     // current directory (any subdirectories will follow later on
  432     // in the recursive step). Make sure the directory path string
  433     // is of the form /path/to/directory/.
  434     //
  435     $linkdir="http://".$_SERVER['HTTP_HOST']
  436       ."/".trim(substr($directory,strlen($siteroot)),"/")."/";
  437   
  438     fwrite($fp,"<url>\n");
  439   
  440     fwrite($fp,"  <loc>");
  441     fwrite($fp,$linkdir);
  442     fwrite($fp,"</loc>\n");
  443   
  444     $timestamp_string=xml_timestamp_string($lang,$siteroot,$directory);
  445     fwrite($fp,"  <lastmod>");
  446     fwrite($fp,$timestamp_string);
  447     fwrite($fp,"</lastmod>\n");
  448   
  449     fwrite($fp,"  <changefreq>");
  450     fwrite($fp,"monthly");
  451     fwrite($fp,"</changefreq>\n");
  452   
  453     fwrite($fp,"  <priority>");
  454     fwrite($fp,"0.7");
  455     fwrite($fp,"</priority>\n");
  456   
  457     fwrite($fp,"</url>\n");
  458   
  459     //
  460     // If the current directory has been found to contain subdirectories
  461     // which should be visible (that is to say, if $kmax>0, this number
  462     // having been previously obtained as the first returned argument in
  463     // the call to the get_visible_subdirectories($directory) function),
  464     // then go ahead and generate an unordered list (<ul>) for the sub-
  465     // directories. This list is then populated with list items (<li>)
  466     // recursively.
  467     //
  468     if ($kmax>0) {
  469       for ($k=1;$k<=$kmax;$k++) // For all subdirectories
  470         recursively_list_subdirectories_xml($fp,$lang,$dirnames[$k],
  471              $reclevel+1,$siteroot);
  472     } else {
  473       if ($kmax<0) {
  474         echo "Fatal error encountered (negative kmax=$kmax)!";
  475         exit(1);
  476       }
  477     }
  478   
  479     return;
  480   }
  481   
  482   function sitemap_xml($lang,$sitemap_file) {
  483     $siteroot=rtrim($_SERVER['DOCUMENT_ROOT'],"/")."/"; // Ensure trailing slash!
  484     $filename=$siteroot.$sitemap_file;
  485     if (file_exists($filename)) {
  486       //
  487       // PHP should Under no circumstances have the following permission.
  488       //
  489       // chmod($siteroot.$sitemap_file,666);
  490       echo "<p>Overwriting site map $filename</p>\n";
  491     }
  492     echo "<p>Generation of XML started at ".gmdate("M d Y H:i:s",mktime())."</p>";
  493     $fp=fopen($filename,'w');
  494     if (!$fp) {
  495       echo "<p>Error: Could not open $filename for writing.</p>";
  496       return;
  497     }
  498     fwrite($fp,"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
  499     fwrite($fp,
  500       "<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\n");
  501     recursively_list_subdirectories_xml($fp,$lang,$siteroot,0,$siteroot);
  502     fwrite($fp,"</urlset>\n");
  503     fclose($fp);
  504     echo "<p>Generation of XML finished at ".gmdate("M d Y H:i:s",mktime())."</p>";
  505     echo "<p><a href=\"$sitemap_file\">View generated XML file</a></p>";
  506     return;
  507   }
  508   
  509   function sitemap($lang) {
  510     $sitemap_mode="xhtml";             // Default mode of sitemap generation
  511     $sitemap_file=$GLOBALS['default_xmlfile'];   // Default file for XML output
  512     $numargs=func_num_args();          // Number of input args to the function
  513     $arg_list = func_get_args();       // The input args to the function
  514   
  515     //
  516     // If more than one input arguments, ...
  517     //
  518     if ($numargs >= 2) {
  519       //
  520       // ... then the second argument is the mode parameter ("xhtml" or "xml")
  521       //
  522       $sitemap_mode=trim($arg_list[1]);
  523       if (!((strcmp($sitemap_mode,"xhtml")==0)
  524              ||(strcmp($sitemap_mode,"xml")==0))) {
  525         echo "Error! Unknown supplied mode '$sitemap_mode'!";
  526         return;
  527       }
  528       //
  529       // ... while, on the other hand, if we have more than two input arguments,
  530       //
  531       if ($numargs >= 3) {
  532         //
  533         // ... then the third argument is taken as the file name of the
  534         // XML output. (This argument has no impact whatsoever unless
  535         // the second argument was "xml".)
  536         //
  537         $sitemap_file=$arg_list[2];
  538       }
  539     }
  540   
  541     //
  542     // If a valid switch is present at the URL, then override the optional
  543     // input arguments and go ahead with simply generating XML output in
  544     // the default XML file.
  545     //
  546     if (($md5hash=$_GET["xmlkey"])!="") {
  547       //
  548       // Unless the supplied MD% hash code matches that of the key word,
  549       // do nothing to the XML file and instead generate the XHTML output.
  550       //
  551       if (strcmp(trim($md5hash),md5(trim($GLOBALS['xml_keystring'])))==0) {
  552         // 25a7551f7a613cf4926fdd89eff66aae
  553         $sitemap_mode="xml";
  554         $sitemap_file=$GLOBALS['default_xmlfile']; // Default file for XML output
  555       } else {
  556         $sitemap_mode="xhtml";
  557       }
  558     }
  559   
  560     if (strcmp($sitemap_mode,"xhtml")==0) {
  561       //
  562       // Generate XHTML for the display of the site map.
  563       //
  564       sitemap_xhtml($lang);
  565     }
  566   
  567     if (strcmp($sitemap_mode,"xml")==0) {
  568       //
  569       // Generate XHTML for the display of the site map.
  570       //
  571       sitemap_xml($lang,$sitemap_file);
  572     }
  573   
  574     return;
  575   }
  576   
  577   ?>
  578   

Return to previous page

Generated by ::viewsrc::

Last modified Wednesday 15 Feb 2023