<?
$t1 
= (double)microtime();
// ------------------------------------------------------------------------- //
// Description du script.                                                    //
// ------------------------------------------------------------------------- //
// Auteur: Damien MARTRAIRE                                                  //
// Email:  Send from http://www.atypix.com/contact.php3                       //
// Web:    http://www.atypix.com/                                              //
// Fichier du 15/05/2001, Mise à jour du 18/11/2003                          //
// ------------------------------------------------------------------------- //



$color1="#EEEEEE";    //content table color
$color2="#996699";    //title table color
//$proxy="proxy";    //si proxy
//$proxy_port=8080;    //si proxy


/** Classe PageAnalyse */
class PageAnalyse
{
    var 
$url="";            //url du fichier
    
var $page="";            //contenu de la page à analyser
    
var $xml="";            //buffer de sortie XML
    
var $error="";            //buffer avec les erreurs
    
var $content="";        //buffer de sortie du code HTML
    
var $header=array();    //tableau avec les en tete http recu avec fsock
    
var $statut="";            //etat de la connection 200=ok
    
var $date="";            //date de la page
    
var $connexion;            //type de connexion "fread" ou "fsock"



    /*****************************************************
    ** Constructor                                          **
    *****************************************************/

    
function PageAnalyse($url)
    {
        global 
$action,$connexion;
        
$this->connexion=$connexion;
        
$this->url=$url;
        
$this->beginTable();
        
$this->insertTitle("Defi PHP n°3","a10",0);
        
$this->insertLine("<a href='http://www.atypix.com/contact.php3'>Send from http://www.atypix.com/contact.php3</a>");
        
$this->insertLine("<a href='http://www.atypix.com'>www.ATYPIX.com</a>");
        
$this->insertLine("Fichier du 15/05/2001, Mise à jour du 18/11/2003");
        
$this->printForm();
        if(
$url)
        {
            
$this->connect();
            if(
$this->headProcess() && !$action )
            {
                
$this->menu();
                
$this->insertTitle("Header de la page","a20");
                
$this->getHead();
                
$this->insertTitle("Données de la page","a30");
                
$this->getInfosPage();
                
$this->insertTitle("Métas de la page","a40");
                
$this->getMetas();
                
$this->insertTitle("Liens de la page","a50");
                
$this->getLinks();
                
$this->insertTitle("Images de la page","a60");
                
$this->getImages();
                
$this->printXml();
                
//$this->writeXml("url.xml"); write the XML in a file
            
}
            else if(
$action=="source")
            {
                
$this->insertTitle("Code source de la page $this->url","a80");
                
$this->insertLine(nl2br(htmlentities($this->page)),"");
            }
            else
            {
                
$this->insertTitle("Erreur de connexion sur $this->url","a80");
                if(
$this->connexion=="fread")
                {
                    
$text="ERREUR: Fichier introuvable";
                    
$this->insertLine($text,$this->error);
                }
                else
                {
                    
$text="ERREUR: $this->error - Code HTTP";
                    
$this->insertLine($text,$this->statut);
                }
            }
        }

        
$this->insertTitle("Fin de l'analyse","a70");
        
$this->closeTable();
    }


    
/*****************************************************
    ** Output buffer management                              **
    *****************************************************/

    /** Handle the error buffer */
    
function error($text)
    {
        
$this->error.=$text."<br>\n";
    }

    
/** Put a begin table balise in output buffer */
    
function beginTable()
    {
        
$this->content.="<table border=\"0\" cellspacing=\"1\" ";
        
$this->content.="cellpadding=\"2\" width=\"100%\">\n";
        
$this->content.="<tr><td width=\"50%\">";
        
$this->content.="</td><td width=\"50%\"></td></tr>\n";
    }

    
/** Put text in output buffer */
    
function insertText($text)
    {
        
$this->content.="$text";
    }

    
/** Put a begin table balise in output buffer */
    
function closeTable()
    {
        
$this->content.="</table>\n\n";
    }

    
/** Put a table title ligne in output buffer */
    
function insertTitle($text,$num,$br="1")
    {
        if(
$br//insert une ligne vide avant
        
{
            
$this->content.="<tr><td colspan=\"2\">&nbsp;</td></tr>\n";
        }
        
$this->content.="<tr><td class=\"title\" colspan=\"2\">\n";

        
$this->content.="<table width=\"100%\"><tr><td class=\"title\">";
        
$this->content.="<a name=\"$num\">\n$text&nbsp;\n";
        
$this->content.="</td><td align=\"right\">";
        
$this->content.="<a href=\"#top\" class=\"title\">Haut</a>\n";
        
$this->content.="</td></tr></table>";

        
$this->content.="</td>\n</td></tr>\n";
    }

    
/** Put a table content ligne in output buffer */
    
function insertLine($text,$value="")
    {
        
$this->content.="<tr>\n";
        if(
$value)
        {
            
$this->content.="<td class=\"content\">$text&nbsp;</td>\n";
            
$this->content.="<td class=\"content\">$value&nbsp;</td>\n";
        }
        else
        {
            
$this->content.="<td class=\"content\" colspan=\"2\">\n";
            
$this->content.="$text&nbsp;</td>\n";
        }
        
$this->content.="</tr>\n";
    }

    
/** Put a table content ligne in output buffer */
    
function insertXML($tag,$attribut="",$value)
    {

        if(
$attribut=="")
        {
            
$this->xml.="<".strtoupper($tag).">";
        }else
        {
            
$this->xml.="<".strtoupper($tag)." $attribut>";
        }
        
$this->xml.=$value;
        
$this->xml.="</".strtoupper($tag).">\n";
    }


    
/*****************************************************
    ** Output buffer print management                     **
    *****************************************************/

    /** print a menu */
    
function menu()
    {
        global 
$PHP_SELF;
        
$text ="&nbsp;<b>: : :</b>&nbsp; Menu &nbsp;<b>: : :</b>&nbsp;";
        
$text.="&nbsp;<a href=\"#a20\">En tête</a>&nbsp; | ";
        
$text.="&nbsp;<a href=\"#a30\">informations</a>&nbsp; | ";
        
$text.="&nbsp;<a href=\"#a40\">métas</a>&nbsp; | ";
        
$text.="&nbsp;<a href=\"#a50\">liens</a>&nbsp; | ";
        
$text.="&nbsp;<a href=\"#a60\">images</a>&nbsp; | ";
        
$text.="&nbsp;<a href=\"#xml\">format XML</a>&nbsp; | ";
        
$text.="&nbsp;<a href=\"".basename($PHP_SELF);
        
$text.="?action=source&url=$this->url\">";
        
$text.="sources</a>&nbsp;";
        
$this->insertLine($text,"");
    }

    
/** PRINT the url input form */
    
function printForm()
    {
        global 
$PHP_SELF;
        
$this->insertText("<form action=\"".basename($PHP_SELF));
        
$this->insertText("\" method=\"post\">\n");
        
$this->insertTitle("URL à analyser","a100");
        
$text ="<input type=\"text\" name=\"url\" size=\"35\" ";
        
$text.="maxlength=\"256\" value=\"";
        
$text.=($this->url)?$this->url:"http://www.atypix.com";
        
$text.="\">&nbsp;\nConnexion par&nbsp;";
        
$text.="<input type=\"radio\" name=\"connexion\" value=\"fsock\" ";
        if(
$this->connexion=="fsock" || !$this->connexion)$text.="checked";
        
$text.=">Socket&nbsp;\n";
        
$text.="<input type=\"radio\" name=\"connexion\" value=\"fread\" ";
        if(
$this->connexion=="fread")$text.="checked";
        
$text.=">Fichier&nbsp;\n";
        
$text.="<input type=\"submit\" value=\"Analyser\" value=\"";
        
$text.="$this->url\">\n";
        
$text.="<input type=\"hidden\" value=\"submit\" value=\"1";
        
$text.="$this->url\">\n";
        
$this->insertLine($text,"");
        
$this->insertText("</form>\n\n");
    }

    
/** PRINT the HTML content of this analys page */
    
function printPage()
    {
        print 
$this->content;
    }


    
/** PRINT the XML content of this analys page */
    
function printXml()
    {
        
$this->xml str_replace("&amp;nbsp;","",$this->xml);
        
$this->insertTitle("Analyse de $this->url au format XML","xml");
        
$this->insertLine(nl2br(htmlentities($this->xml)));
    }

    
/** PRINT the errors of this analys */
    
function printError()
    {
        print 
$this->error;
    }


    
/*****************************************************
    ** URL and file management                              **
    *****************************************************/

    
function writeXml($url)
    {
        
$file fopen($url,"w");
        if(
$file)
        {
            
fwrite($file,$this->xml);
            
fclose($file);
        }else
        {
            
$this->error("Fichier impossible à ouvrir");
        }
    }

    
/** FSOCK connection for distant url */
    
function connectSocket($url,$port="80")
    {
        global 
$proxy$proxy_port;
        if(
$proxy)
        {
            
$fp fsockopen($proxy,$proxy_port);
        }
        else
        {
            
$array=explode("/",$url);
            
$fp fsockopen("$array[2]",$port);
        }
        if(!
$fp)
        {
            
$this->error("Page non accessible par socket");
        }
        else
        {
            
fputs($fp"GET $this->url/ HTTP/1.0\r\n\r\n");
            while (!
feof($fp)) $this->page.=fgets($fp,64000);
            
fclose($fp);
        }
    }

    
/** FOPEN connection for local url */
    
function connectFile($url)
    {
        if(
eregi("^(http[s]?\:\/\/)(.*)",$url))
        {
            
$fp = @fopen($url,"r");
            if(!
$fp)
            {
                
$this->error("Page non accessible par ouverture de fichier");
            }
            else
            {
                while(!
feof($fp)) 
                { 
                    
$page.=fread($fp1024); 
                } 
                
$this->page=$page;
                
fclose($fp);
            }
        }else
        {
            
$this->error("Le fichier doit être sur un serveur Internet");
        }
    }

    
/** CONNEXION management */
    
function connect()
    {
        switch(
$this->connexion)
        {
            case 
"fread":
            
$this->connectFile($this->url);
            break;

            case 
"fsock":
            
$this->connectSocket($this->url);
            break;

            default:
            
$this->connectSocket($this->url);
            break;
        }
    }


    
/*****************************************************
    ** HTTP Header management                              **
    *****************************************************/

    /** return the statut to continue or not in case of HTTP code */
    
function httpCode()
    {
        switch(
1)
        {
            case(
$this->statut==302):     //redirection
            
for ($i=1;$this->header[$i];$i++)
            {
                
$location=split(":",$this->header[$i],2);
                if(
eregi("location",$location[0]))
                {
                    
$redirect=trim($location[1]);
                }
            }
            
$url =basename($PHP_SELF);
            
$url.="?url=$redirect&from=$this->url";
            
header("Location: $url");
            return 
1;
            break;

            case(
$this->statut==401):    //Authentification requise
            
$this->statut.="&nbsp;Authentification requise";
            return 
0;
            break;

            case(
$this->statut==403):    //Acces non autorise
            
$this->statut.="&nbsp;Accès non autorisé";
            return 
0;
            break;

            case(
$this->statut==404):    //Page inexistante
            
$this->statut.="&nbsp;Page inexistante";
            return 
0;
            break;

            case(
$this->statut==503):    //serveur non valide
            
$this->statut.="&nbsp;URL non valide";
            return 
0;
            break;

            case(
$this->statut >=200 && $this->statut <400): //ok
            
$this->statut.="&nbsp;ok";
            return 
1;
            break;

            default:
            return 
0;
            break;
        }
    }

    
/** parse the HTTP header and return the statut */
    
function headProcess()
    {
        if(
$this->page!="")
        {
            if(
$this->connexion=="fsock")
            {
                
$html_begin=strpos($this->page"<");
                
$this->header=substr($this->page0$html_begin);
                
$this->page=substr($this->page$html_begin);
                
$this->header=explode("\n",$this->header);
                for (
$i=1;$this->header[$i];$i++)
                {
                    if(
strlen($this->header[$i])<=1)
                    {
                        unset(
$this->header[$i]);
                    }
                }
                
$statut=explode(" ",$this->header[0]);
                
$this->statut=$statut[1];
                return 
1;
            }
            else
            {
                return 
1;
            }
        }
        else return 
0;
    }


    
/*****************************************************
    ** SEARCH and REGEX for item (link,img,..)              **
    *****************************************************/
    /** renvoi l'url de base avec les repertoires */
    
function baseUrl($url)
    {
        
$url=trim($url);
        if(
substr($url,strlen($url)-1)!="/" && !$this->isLocal($url))
        {
            
$url_array=explode("/",$url);
            if(
ereg("\.",$url_array[count($url_array)-1])
            && 
count($url_array) > 3)
            {
                unset(
$url_array[count($url_array)-1]);
                
$url=implode("/",$url_array);
            }
            
$url.="/";
        }
        return 
$url;
    }

    
/** IF LOCAL URL return 1 */
    
function isLocal($url)
    {
        return !
eregi("^(http[s]?|ftp|mailto):/{0,2}",trim($url));
    }

    
/** return an array of balises (si lien renvoi jusqu'a "</a>") */
    
function searchBalises($type)
    {
        
$page=$this->page;
        while(
stristr($page,"<$type"))
        {
            
$page stristr($page,"<$type");
            
$end strpos($page">");
            
$balise substr($page0$end+1);
            
$page substr($page$end+1);
            if(
$type=="a" || $type=="A")
            {

                
$page_array split("< */[aA] *>",$page,2);
                
$text eregi_replace("( *<[^>]*> *)","",$page_array[0]);
                
$page $page_array[1];
                
$list[]=array("url"=>$balise,"alt"=>$text);
            }
            else
            {
                
$list[]=$balise;
            }
        }
        return 
$list;
    }

    
/** retourne la valeur d'un attibut donne dans une balise donnee */
    
function getAttribut($name,$balise)
    {
        
$exp1 ="(\" *([^\"]*) *\")";    // case "value"
        
$exp2.="(' *([^']*) *')";        // case 'value'
        
$exp3.="([^ \"'=<>]*)";            // case  value
        
$exp  =" *($name) *= *($exp1|$exp2|$exp3) *";
        
eregi($exp,$balise,$content);
        if(
$content[4] || $content[6])
        {
            
$result=($content[4])?$content[4]:$content[6];
        }
        else
        {
            
$result=$content[7];
        }
        return 
trim($result);
    }

    
/** return a complete and valide url */
    
function parseUrl($url)
    {
        global 
$PHP_SELF;
        
$gurl=$this->baseUrl($this->url);
        
$url=trim($url);
        switch(
1)
        {
            case( ! 
$this->isLocal($url)):    //distant
            
return $url;
            break;

            case(
substr($url,0,3)=="../"):    //up ../x
            
$array=explode("/",$gurl);
            unset(
$array[count($array)-1]);
            unset(
$array[count($array)-1]);
            
$url2=substr($url,3);
            
$url =implode("/",$array)."/".$url2;
            return 
$url;
            break;

            case(
substr($url,0,2)=="./"):    //root ./x
            
$array=explode("/",$gurl);
            
$url2=substr($url,2);
            
$url =$array[0]."//".$array[2]."/".$url2;
            return 
$url;
            break;

            case(
substr($url,0,1)=="/"):    //root /x
            
$array=explode("/",$gurl);
            
$url2=substr($url,1);
            
$url =$array[0]."//".$array[2]."/".$url2;
            return 
$url;
            break;

            default:
            return 
$gurl.$url;
            break;
        }
    }


    
/*****************************************************
    ** ARRAY management                                      **
    *****************************************************/

    /** return first position of array element if exist, else -1 */
    
function isIn2dArray($array,$value)
    {
        for(
$i=0;$array[$i];$i++)
        {
            if(
$value==$array[$i]["url"])
            {
                return 
$i;
            }
        }
        return -
1;
    }

    
/** insert an item in $array with $val1 and $val2, or sum it if exist */
    
function insertArray($array,$val1,$val2)
    {
        if(
$val1)
        {
            
$find=$this->isIn2dArray($array,$val1);
            if(
$find >=0)
            {
                
$array[$find]["nb"]+=1;
                if(!
$array[$find]["alt"] && $val2)
                {
                    
$array[$find]["alt"]=$val2;
                }
            }
            else
            {
                
$array2=array(
                
"url"=>"$val1",
                
"alt"=>"$val2",
                
"nb"=>1);
                
$array[]=$array2;
            }
        }
    }

    
/** Compte les item d'un tableau avec le nombre de doublons */
    
function countArray($array)
    {
        for (
$i=0;$array[$i];$i++)
        {
            if(
$array[$i]["nb"]>1)
            {
                
$nb+=$array[$i]["nb"];
            }
            else
            {
                
$nb+=1;
            }
        }
        return 
$nb;
    }

    
/** trie le tableau selon le nombre de doublons */
    
function orderArray($array)
    {

    }

    
/** gere l'affichage du nombre de balises */
    
function showCount($type,$array)
    {
        if(
count($array) >0)
        {
            
$nb=$this->countArray($array);
            
$text ="<b>$nb $type";
            if(
$nb>1)$text.="s";
            if(
$nb >= count($array))
            {
                
$doublons $nb count($array);
            }
            else
            {
                
$doublons="0";
            }
            
$text.=" dont $doublons doublons</b>";
        }
        else
        {
            
$text.="Aucun(e) $type trouvé dans cette page</b>";
        }
        
$this->insertLine($text,"");
    }

    
/** Print the given array of balises */
    
function showArray($array,$type)
    {
        global 
$PHP_SELF;
        for (
$i=0;$array[$i];$i++)
        {
            if(
$array[$i]["nb"]>1)
            {
                
$nb="&nbsp;(".$array[$i]["nb"].")";
            }
            else
            {
                
$nb="&nbsp;";
            }

            if(
$array[$i]["nb"] > 1)
            {
                
$attribut="number=\"".$array[$i]["nb"]."\"";
            }else
            {
                
$attribut="";
            }

            
$alt=$array[$i]["alt"]."&nbsp;";
            
$url=$array[$i]["url"];
            
$xml=htmlentities($url);
            switch (
$type)
            {
                case 
"meta":
                
$this->insertLine($url.$nb,$alt);
                
$alt=htmlentities($array[$i]["alt"]);
                if(
$attribut=="")
                {
                    
$attribut="name=\"$xml\"";
                }else
                {
                    
$attribut="$attribut name=\"$xml\"";
                }
                
$this->insertXml("meta",$attribut,$alt);
                break;

                case 
"link":

                
$link ="<a href=\"".basename($PHP_SELF)."?scan=";
                
$link.=urlencode($url)."\"";
                
$link.="target=\"_blank\">[scan] </a>";
                
$link.="<a href=\"$url\" target=\"_blank\">";
                
$link.="$url</a> $nb";
                
$this->insertLine($link,$alt);
                
$this->insertXml("link",$attribut,$xml);
                break;

                case 
"img":
                
$link ="<a href=\"$url\" target=\"_blank\">";
                
$link.="$url</a> $nb";
                
$this->insertLine($link,$alt);
                
$this->insertXml("image",$attribut,$xml);
                break;

                default:
                
$this->insertLine($url.$nb,$alt);
                
$this->insertXml("other",$attribut,$xml);
                break;
            }
        }
    }


    
/*****************************************************
    ** HEAD page management                               **
    *****************************************************/
    
function getHead()
    {
        if(
$this->connexion=="fsock")
        {
            
$statut=explode(" ",$this->header[0]);
            
$this->insertLine("Protocol",$statut[0]);
            
$this->insertLine("Code etat",$statut[1]);
            
$this->insertLine("Obtention de la page",$statut[2]);
            for (
$i=1;$this->header[$i];$i++)
            {
                
$statut=split(":",$this->header[$i],2);
                if(
eregi("date",$statut[0]))
                {
                    
$this->date=$statut[1];
                }
                else
                {
                    
$this->insertLine($statut[0],$statut[1]);
                }
            }
        }
        else
        {
            
$this->insertLine("Header","Pas de header en lecture de fichier");
        }
    }


    
/************************************************************
    ** INFORMATIONS page management (indique les redirections) **
    ************************************************************/
    
function getInfosPage()
    {
        global 
$from;
        if(
eregi("<title>([^<]*)</title>",$this->page,$title))
        {
            
$this->insertLine("Titre de la page",$title[1]);
        }
        
$this->insertLine("Url",$this->url);
        if(
$from