<?php
require('.htload-vars.php');
$cachedfile = dirname($_SERVER['DOCUMENT_ROOT'])."/cached/".basename(__FILE__);
$lockfile = '/tmp/'.$_SERVER['SERVER_NAME'].'.'.basename(__FILE__).'.lock';
if(!isset($_GET['create']) && !isset($_GET['recreate'])) {
	if(is_file($cachedfile)) {
		if( strtotime(date("r", time()))-strtotime(date("r", filemtime($cachedfile))) < (intval(config_var('cache_lifetime_sitemap'))) ) {
			header('Content-Type: text/xml; charset=UTF-8');
			header('Content-Length: '.filesize($cachedfile));
			if($_SERVER['REQUEST_METHOD']!='HEAD') {
				readfile($cachedfile);
			}
		} else {
			header('Content-Type: text/xml; charset=UTF-8');
			header('Content-Length: '.filesize($cachedfile));
			// serve the old file anyway, and recreate it in the background
			if($_SERVER['REQUEST_METHOD']!='HEAD') {
				readfile($cachedfile);
			}
			if(is_file($lockfile) && time()-filemtime($lockfile)>3600) {
				// break the lock, because it's been running for over an hour so somthing's probably broken
				exec('rm -f '.$lockfile);
				clearstatcache();
			}
			if(!is_file($lockfile)) {
				// Only spawn processes if there's no lockfile
				exec('(/usr/bin/curl --location --max-redirs 2 --silent -A "Chameleon CMS asynchronous loader robot" -o '.$cachedfile.'.refresh http://'.$_SERVER['SERVER_NAME'].'/'.basename(__FILE__).'?recreate && mv '.$cachedfile.'.refresh '.$cachedfile.') > /dev/null 2>&1 &');
				if(in_array('PRODUCTS', $INSTALLED)) {
					// hit the products sitemap URL, which will recreate it if necessary
					exec('/usr/bin/curl --location --max-redirs 2 --silent -A "Chameleon CMS asynchronous loader robot" -o /dev/null http://'.$_SERVER['SERVER_NAME'].'/sitemap.products.xml > /dev/null 2>&1 &');
				}
				// purge primary cached files older than a day
				exec('/usr/bin/find '.dirname($_SERVER['DOCUMENT_ROOT']).'/cached/ -type f -mtime +1 -name \'uri*\' -exec rm "{}" \; > /dev/null 2>&1 &');
				exec('/usr/bin/find '.dirname($_SERVER['DOCUMENT_ROOT']).'/cached/ -type f -mtime +1 -name \'feed*\' -exec rm "{}" \; > /dev/null 2>&1 &');
				// purge other cached files older than a month
				exec('/usr/bin/find '.dirname($_SERVER['DOCUMENT_ROOT']).'/cached/ -type f -mtime +30 ! -name .gitignore -exec rm "{}" \; > /dev/null 2>&1 &');
			}
		}
	} else {
		if(!is_file($lockfile)) {
			// Only spawn processes if there's no lockfile
			exec('(/usr/bin/curl --location --max-redirs 2 --silent -A "Chameleon CMS synchronous loader robot" -o '.$cachedfile.'.refresh http://'.$_SERVER['SERVER_NAME'].'/'.basename(__FILE__).'?create && mv '.$cachedfile.'.refresh '.$cachedfile.') > /dev/null 2>&1');
		}
		/*
			Do we have a file now? If not, return a human-readable 503
			temporarily unavailable error
		*/
		if(is_file($cachedfile)) {
			header('Content-Type: text/xml; charset=UTF-8');
			header('Content-Length: '.filesize($cachedfile));
			if($_SERVER['REQUEST_METHOD']!='HEAD') {
				readfile($cachedfile);
			}
		} else {
			header('Content-Type: text/html; charset=UTF-8');
			echo_datafail();
		}
	}
} elseif(is_file($lockfile)) {
	/*
		If the lock file exists, there's already a recreation process running
		so we should serve the old cached copy rather than multiply-recreating
		the sitemap. If there's no cached file or it's really old, we're
		waiting for it to be regenerated, so serve a 503 temporarily
		unavailable header and no content.
	*/
	if(is_file($cachedfile) && strtotime(date("r", time()))-strtotime(date("r", filemtime($cachedfile))) < (intval(config_var('cache_lifetime_sitemap')))) {
		header('Content-Type: text/xml; charset=UTF-8');
		header('Content-Length: '.filesize($cachedfile));
		if($_SERVER['REQUEST_METHOD']!='HEAD') {
			readfile($cachedfile);
		}
	} else {
		header('HTTP/1.1 503 Service Unavailable', true, 503);
	}
	if(time()-filemtime($lockfile)>3600) {
		// break the lock, because it's been running for over an hour so something's probably broken
		exec('rm -f '.$lockfile);
	}
} else {
ini_set('memory_limit', '512M'); // there's an issue with memory usage for a large product range
exec('touch '.$lockfile);

if(config_var('auto_status_change')) {
	ChameleonPage::auto_status_change();
}

$homepage = '';
$products_done = array();
if(is_file($_SERVER['DOCUMENT_ROOT']."/index.php")) {
	$lastmod = date(DATE_W3C, filemtime($_SERVER['DOCUMENT_ROOT']."/index.php"));
} elseif(is_file($_SERVER['DOCUMENT_ROOT']."/index.html")) {
	$lastmod = date(DATE_W3C, filemtime($_SERVER['DOCUMENT_ROOT']."/index.html"));
} else {
	$lastmod = date(DATE_W3C);
}

header('Content-Type: text/xml; charset=UTF-8');
echo '<?xml version="1.0" encoding="UTF-8"?>', PHP_EOL;
echo '<?xml-stylesheet type="text/xsl" href="/sitemap.xsl"?>', PHP_EOL;
?>
<urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
	xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"
	xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
	<url>
		<loc><?= canonical_href('/'.$homepage);?></loc>
		<lastmod><?=$lastmod;?></lastmod>
	</url>
<?php
if(!config_var('site_hidden')) {
// group ids in 50s to load page information in a smaller number of bulk queries
$grouped_ids = array();
$obj_ids = array();
$i = 0;
foreach ($STRUCTURE as $objdata) {
	$objpos = $objdata['pos'];
	foreach($STRUCTURE[$objdata['id']]['posarray'] as $pos) {
		if(!in_array($pos, $POSITION) || $STRUCTURE[array_search($pos, $POSITION)]['status']!=1) continue(2); // skip this if it or one of its parents is not public
	}
	$grouped_ids[intval($i++/50)][$objdata['pos']] = $objdata['id'];
	$obj_ids[] = $objdata['id'];
}
foreach($grouped_ids as $codes) {
	cache_get_type_fromcode($codes);
}
foreach ($obj_ids as $obj_id) {
	$objdata = $STRUCTURE[$obj_id];
	$objpos = $objdata['pos'];
	$objdata = get_item_fromurl($PATH[$DEFAULT['language']][$objdata['id']]);
	if(empty($objdata['link']) || item_is_empty($objdata)) continue;
	$lastmod = date(DATE_W3C,$objdata['tsmodified']);
	?>
	<url>
		<loc><?= htmlspecialchars(canonical_href($objdata['link']),ENT_QUOTES|ENT_XML1,'UTF-8');?></loc>
		<lastmod><?=$lastmod;?></lastmod>
	</url>
<?php
	if(in_array('CALENDAR', $INSTALLED) && $objdata['calendars']!='') {
		$entries = ChameleonCalendar::retrieve_by_search(array('category_id'=>array_filter(explode('|', $objdata['calendars']))), array());
		if(is_array($entries)) foreach($entries as $entry) {
			$permalink = $objdata['link'].$entry->relative_link();
			$lastmod = date(DATE_W3C, strtotime($entry->get_modified()));
		?>
	<url>
		<loc><?= htmlspecialchars(canonical_href($permalink),ENT_QUOTES|ENT_XML1,'UTF-8');?></loc>
		<lastmod><?=$lastmod;?></lastmod>
	</url>
<?php
		}
	}
	if(!empty($objdata['products']) && !empty($objdata['productdetail'][0])) {
		$all_products = array();
		foreach ($objdata['productdetail'] as $arr) $all_products = array_merge($all_products, $arr);
		$objdata['productdetail'] = array(array_unique($all_products));

		set_time_limit(30); // There is an issue with how long this takes...
		ChameleonProduct::clear_caches('all');
		$products = ChameleonProduct::retrieve($objdata['productdetail'][0], array(), false);
		foreach($objdata['productdetail'][0] as $product_id) {
			if(in_array($product_id, $products_done)) continue;
			$product = $products[$product_id];
			if($product->get_status()!=1) continue;
			$lastmod = date(DATE_W3C, strtotime($product->get_modified()));
			$product_url = preg_replace('/\/$/', '', $objdata['link']).'/'.string_to_url($product->get_name($DEFAULT['systemlanguage'])).'/';
			$canonical_url = $product->get_canonical_url($GLOBALS['DEFAULT']['language']);
			if(!empty($canonical_url) && $product_url != $canonical_url) {
				// Check the canonical URL is potentially valid, remove it if not
				$hosting_page = dirname($canonical_url).'/';
				$prefix_found = array_search($hosting_page, $PATH[$DEFAULT['language']]);
				if($prefix_found!==false) {
					/*
						If the page that was hosting this product is still in
						$PATH and matches the current page, then we're only
						here because the full canonical url didn't match, in
						which case the product name has changed, so we need to
						update the canonical url.

						If not, the prefix found matched another page, so we
						skip rendering this item here
					*/
					if($hosting_page==$objdata['link']) {
						$canonical_url = $product_url;
						$product->set_canonical_url($DEFAULT['language'], $canonical_url);
						$product->save();
					} else {
						continue;
					}
				} else {
					$canonical_url = '';
					$product->set_canonical_url($DEFAULT['language'], '');
					$product->save();
				}
			}
			if(empty($canonical_url)) {
				$urls = product_urls($product);
				if(!empty($urls)) {
					$canonical_url = $urls[0];
					$product->set_canonical_url($DEFAULT['language'], $canonical_url);
					$product->save();
				}
			}
			$products_done[] = $product_id;
			if(substr($product_url, -1)!='/') $product_url .= '/';
		?>
	<url>
		<loc><?= htmlspecialchars(canonical_href($product_url), ENT_QUOTES|ENT_XML1, 'UTF-8');?></loc>
		<lastmod><?=$lastmod;?></lastmod>
	</url>
<?php
		}
	}
}
}
?>
</urlset>
<?
exec('rm -f '.$lockfile);
}
