Automatically Redirect 404’s to The Closest Page on Your Sitemap

I had a situation where I wanted to redirect thousands of “Not Found” pages automatically – solely with JavaScript.

This is a bit of an odd one. On the one hand, ideally, you want to handle redirects server-side before the website even starts loading – that would be considered the “right” way to do this. One the other hand, sometimes you don’t have access to server side code, or the CMS doesn’t allow you to do things like that. And, Google does parse JavaScript. 

So, I realize it’s not the cleanest solution, but it works well and across many situations.

Introduction:

This is a script for your 404 page. When a user lands on a page that doesn’t exist, it checks your sitemap, looks for a page URL on your site that is the most similar, and redirects them there instead.

For example, if someone tried visiting:

  • /how-to-writejavascriptcode/

But the URL doesn’t exist and shows a 404, since it was recently changed to:

  • /blog/writejavascriptcode/

It would extract the keywords “write“, “javascript“, and “code“, and find the correct URL automatically. Cool, huh?

I included some easy-to-change settings at the top, so you can add keywords to ignore, the title of your “Not Found” page to identify when a 404 was trigered, minimum word length, and more:

<script>
// Settings 

const baseUrl = 'https://domain.com/'; // Your website root
const sitemapURL = 'https://domain.com/sitemap.xml'; // Your sitemap URL
const notFoundTitleKeyword = 'Not Found'; // A keyword that is present in your "Not Found" page title. This might be "404", "Uh oh", "We can't find.." etc.
const ignoreWords = ['blog', 'feed', 'guide']; // Common words to ignore. These should be low-value words that are present in many of your URLs.
const minWordLength = 3; // Set the minimum word length in characters. This ignores low-value words like "to", "a", "the", etc. 

// Only edit below here if you know what you're doing:

document.addEventListener('DOMContentLoaded', function() {
	if (document.title.includes(notFoundTitleKeyword)) {
		async function fetchSitemap(url) {
			try {
				const response = await fetch(url);
				const text = await response.text();
				const parser = new DOMParser();
				const xmlDoc = parser.parseFromString(text, "text/xml");
				const urls = xmlDoc.querySelectorAll('loc');
				return Array.from(urls).map(node => node.textContent);
			} catch (error) {
				console.error('Error fetching sitemap:', error);
				return [];
			}
		}
		function processCurrentURL() {
			const path = window.location.pathname;
			let segments = path.split('/').filter(segment => segment.length > 0 && !ignoreWords.includes(segment));
			segments = segments.map(segment => {
				if (segment.match(/^\d+$/)) return null;
				return segment.replace(/-/g, ' ');
			}).filter(segment => segment != null && segment.length > minWordLength);
			return segments.join(' ').split(' ').filter(word => word.length > minWordLength);
		}
		function findClosestMatch(currentKeywords, urls) {
		  let bestMatch = '';
		  let bestScore = 0;
		  urls.forEach(url => {
			  const urlParts = url.replace(baseUrl, '').split('/').filter(part => part.length);
			  let score = 0;
			  currentKeywords.forEach(keyword => {
				  if(urlParts.some(part => part.includes(keyword))) score++;
			  });
			  if(score > bestScore) {
				  bestScore = score;
				  bestMatch = url;
			  }
		  });
		  return bestMatch;
		}
		async function redirectToClosestMatch() {
		  const sitemapUrls = await fetchSitemap(sitemapURL);
		  if (sitemapUrls.length === 0) {
			  console.log('Sitemap is empty or could not be fetched.');
			  return;
		  }
		  const currentKeywords = processCurrentURL();
		  const closestMatch = findClosestMatch(currentKeywords, sitemapUrls);
		  if(closestMatch) {
			  console.log('Redirecting to:', closestMatch);
			  window.location.replace(closestMatch); 
		  } else {
			  console.log('No close match found.');
		  }
		}
		redirectToClosestMatch();
	} 
	else {
		console.log('Incorrect page title:', document.title);
	}
});
</script>

Here’s a backup on Pastebin, just in-case the syntax highlighter stops working:
https://pastebin.com/kw0FRxCV

What People Are Saying:

No comments yet. Be the first!

Leave a Reply

Copyright 2024, All rights reserved. Yadda yadda.