Joshua hatfield - 2010-03-15 20:01:16 -
In reply to message 11 from Jason Brook
I don't know how well this "forum" is going to format this, but at least it should give you an idea. This is the code I'm using to detect the favicon.
function favicon_find($referrence_url = null) {
// Set absolute path to false in case nothing else is found.
$favicon_abs = false;
// Check to make sure a url was given.
if ($referrence_url) {
// Find hostname and path from url.
if (preg_match("/(https?\:\/\/[\w\d_.]+)(\/.*)?$/si", $referrence_url, $matches)) {
$host = $matches[1];
$pagepath = $matches[2];
// Get page contents from url.
if ($file_contents = @file_get_contents($referrence_url)) {
// Check url contents for link tag.
if (preg_match("/<link[^>]+rel=\"(?:shortcut )?icon\"[^>]+?href=\"([^\"]+?)\"/si", $file_contents, $matches)) {
$favicon = $matches[1];
} else {
// No link tag, assign default.
$favicon = "/favicon.ico";
}
// Convert favicon path to absolute.
$favicon_abs = url_find_absolute($favicon, $referrence_url);
}
}
}
// Make sure there is actually a file there.
if (!file_get_contents($favicon_abs)) {
$favicon_abs = false;
}
return $favicon_abs;
}
function url_find_absolute($relative_url, $referrence_url) {
// First check if this is actually absolute.
if (preg_match("/(https?\:\/\/[\w\d_.]+)(\/.*)?$/si", $relative_url, $matches)) {
// If so, return itself.
$favicon_abs = $relative_url;
// Then, make sure the referrence is absolute, this is necessary (otherise, return false).
} elseif (preg_match("/(https?\:\/\/[\w\d_.]+)(\/.*)?$/si", $referrence_url, $matches)) {
// Pull out the hostname and path for later use.
$ref_host = $matches[1];
$ref_pagepath = $matches[2];
// If relative url begins with /, just add it to the host.
if (preg_match('/^\//si', $relative_url, $matches)) {
$favicon_abs = $ref_host.$relative_url;
// Otherwise, merging paths is needed.
} else {
// Break up the paths by directory.
$ref_array = explode("/", $ref_pagepath);
$rel_array = explode("/", $relative_url);
// Pop off the filename from the referrence (it's not needed).
array_pop($ref_array);
// Check each directory
while (null !== $rel_part = array_shift($rel_array)) {
// Looking for "." or empty (stay in same directory).
if ($rel_part == "." || $rel_part == "") {
// Don't do anything, already in current directory
// Looking for ".." (going up one directory).
} elseif ($rel_part == "..") {
// Move up one directory.
array_pop($ref_array);
} else {
// Add directory (or file) to altered referrence array
array_push($ref_array, $rel_part);
}
// Make sure there's always room for the starting /
if (count($ref_array) == 0) {
array_push($ref_array, "");
}
}
// Implode altered referrence and add to hostname for final result.
$favicon_abs = $ref_host.implode("/", $ref_array);
}
} else {
// Returninf false because referrence is not absolute.
return false;
}
return $favicon_abs;
}