How To Wiki
Advertisement

Perl script for processing Wikihowto pages

!Please do NOT use unless you know what you are doing!



Version 1.0 September 2007

#!/usr/bin/perl 


####################################
# Modules
#

use CMS::MediaWiki;
use LWP::Simple;
use Net::hostent;
use Socket;
use IO::Socket;
use Time::HiRes qw(time);
use Getopt::Std;



#########################################
# Test options
#

# yes or no (1/0)
$download = 0;
$downloadWanted = 0;
$writePages = 0;


#########################################
#     Time Server Settings
# some setting that 
# need to be done first
# to enable the time 
# getter in a extreamly 
# rediculus way
#

my $server = "1.pool.ntp.org";
my $serverIPv4 ="";
if (gethostbyname($server)) {
    $serverIPv4 = sprintf("%d.%d.%d.%d",unpack("C4",gethostbyname($server)));
}
my $timeout = 2;



########################################
########################################
###     Variables
###
###

$dblocation="http://www.wikia.com/dbdumps/howto/pages_current.xml";
# $root = "http://howto.wikia.com"; #no slash
$rootDir = "";
# $host2 = "http://howto.wikia.com/index.php?title=Special:Wantedpages&limit=500&offset=0";
$backupFileName='pages_current.xml'; #name of file save on you computer (via wget)


$username = 'Bot-Admin-ZyMOS';
$password = $ARGV[0]; # The password is the first command line argument



$wantedLink = "http://howto.wikia.com/index.php?title=Special:Wantedpages&limit=500&offset="; #without offset #
$numberWantedPages = 2;


$sleepInt = 4; # max time(sec) for random sleep interval between extractions, and posts (integer or float)


####################
# Page Names

# File named for lists or pages

$objectPage = $rootDir . "Help:Objects List";
$objectEmptyPage = $rootDir . "Help:Empty Objects";
$objectWantedPage = $rootDir . "Help:Wanted Objects";

$howtoPage = $rootDir . "Help:Howto List";
$howtoStubPage = $rootDir . "Help:Howto Stub List";
$howtoWantedPage = $rootDir . "Help:Wanted Howtos";

$guidePage = $rootDir . "Help:Guide List";
$guideStubPage = $rootDir . "Help:Guide Stub List";
$guideWantedPage = $rootDir . "Help:Wanted Guides";

$helpPage = $rootDir . "Help:Help Pages List";
$redirectPage = $rootDir . "Help:Redirects List";
$unknownPage = $rootDir . "Help:Unknown List";
$metaPage = $rootDir . "Help:Meta List";



# Templates to write the total amount of each page type
$tmpStubHowto = $rootDir . "Template:numOfStubHowtos";
$tmpHowto = $rootDir . "Template:numOfHowtos";
$tmpWantedHowto = $rootDir . "Template:numOfWantedHowtos";

$tmpStubGuide = $rootDir . "Template:numOfStubGuides";
$tmpGuide = $rootDir . "Template:numOfGuides";
$tmpWantedGuide = $rootDir . "Template:numOfWantedGuides";


$tmpWantedObject = $rootDir . "Template:numOfWantedObjects";
$tmpEmptyObject = $rootDir . "Template:numOfEmptyObjects";
$tmpObject = $rootDir . "Template:numOfObjects";

$tmpUnknown = $rootDir . "Template:numOfUnknowns";
$tmpHelp = $rootDir . "Template:numOfHelps";
$tmpRedir = $rootDir . "Template:numOfRedirects";

$tmpMeta = $rootDir . "Template:numOfMetas";





#############################
# Messages
 
# Messages to be printed on the page lists
$pageBodyIntro = "\nThis page contains a list of all";
$pageBodyIntro2 = "on Wikihowto. It is not intended to be used as a catalog, but more of an index. If you are searching for a specific subject use the search box or See: The full [[Help:Object Lists|]]\n\n\nThis page was created by a bot and the page is refreshed weekly. You can add a link on this page and it will be processed the next rotation, but its recomended you make a link on your user page.  \n\n\nSee Also: [[Object List]], [[Howto List]], [[Guide List]], [[Help:All_page_types]]\n----";






##################################
#  Write woli paghes subroutine
#

sub updatePage
{
  local($a, $b);		
  ($a, $b) = ($_[0], $_[1]);	
  #Post the page
  if($a eq ''){
	  print "!!!!!!!!!!!!!!!!!!!!!!!!!!";
	  print "!! name error: name empty";
	  print "!!!!!!!!!!!!!!!!!!!!!!!!!!";
  }else{
	  $rc = $mw->editPage(   
	  title   => "$a" ,
	  section => '' , 
	  text    => "$b" ,
	  summary => "Updated via Bot." ,
	  );
	# randome sleep from 0-n in sec
	# to put less load on server
  }
  my $b = rand($sleepInt);
  system(" sleep $b");
}





###########################################
# Downloading Database
#
$wantedPagesContent = "";
if($download){
  print "\n# Get the database\n\n";
  system("rm -f pages_current.xml");
  system("wget $dblocation");
}
if($downloadWanted){
  print "\n# Getting wanted pages\n\n";
	for($x=0;$x<$numberWantedPages;$x++){
        	#$content2 = get($host2);  
        	$offset = 500 * $x;
        	#print $wantedLink . $offset . " page name\n\n\n\n\n\n\n\n<><><><><><><><><><><><><><>\n\n\n\n";
		$wantedPagesContent = $wantedPagesContent . "\n\n\n\n\n\n\n\n\n=================================================\n\n\n\n\n\n\n\n\n\n" . get($wantedLink . $offset);
	}
}



#########################################
#     Time Server Settings
# some setting that 
# need to be done first
# to enable the time 
# getter in a extreamly 
# rediculus way
#

my $server = "1.pool.ntp.org";
my $serverIPv4 ="";
if (gethostbyname($server)) {
    $serverIPv4 = sprintf("%d.%d.%d.%d",unpack("C4",gethostbyname($server)));
}
my $timeout = 2;



########################################################
########################################################
#### Initialization
####
####


$pageNumber = 0;
@pageData = ();
@pageNames = ();
@pageType =();
@pageAttrib =();
$count=0;
$pageOn=0;

##############################
#  Initializing the time variables
#  
 my ($LocalTime0, $LocalTime0F, $LocalTime0H, $LocalTime0FH, $LocalTime0FB);
  my ($LocalTime1, $LocalTime2);
  my ($LocalTime, $LocalTimeF, $LocalTimeT);
  my ($NetTime, $NetTime2, $Netfraction);
  my ($netround, $netdelay, $off);
  
  my ($Byte1, $Stratum, $Poll, $Precision,
      $RootDelay, $RootDelayFB, $RootDisp, $RootDispFB, $ReferenceIdent,
      $ReferenceTime, $ReferenceTimeFB, $OriginateTime, $OriginateTimeFB,
      $ReceiveTime, $ReceiveTimeFB, $TransmitTime, $TransmitTimeFB);
  my ($dummy, $RootDelayH, $RootDelayFH, $RootDispH, $RootDispFH, $ReferenceIdentT,
      $ReferenceTimeH, $ReferenceTimeFH, $OriginateTimeH, $OriginateTimeFH,
      $ReceiveTimeH, $ReceiveTimeFH, $TransmitTimeH, $TransmitTimeFH);
  my ($LI, $VN, $Mode, $sc, $PollT, $PrecisionV, $ReferenceT, $ReferenceIPv4);
  
  my $ntp_msg;  # NTP message according to NTP/SNTP protocol specification







  #######################################################
  ########################################################
  #### Page Sorting
  ####
  ####

  
#########################
# Inital page sorting
#

print "# Page Extraction\n\n";

open(LINKS, "<$backupFileName") || die("Could not open file!");
while(<LINKS>){
  if($_ !~ /((\<namespace )|([\/\<]namespaces\>)|(^\<mediawiki xmlns)|(\<sitename\>.*\<\/sitename\>)|(\<generator\>.*\<\/generator\>)|(\<base\>.*\<\/base\>)|(\<case\>.*\<\/case\>)|([\/\<]siteinfo\>))/){
	if($_ !~ /((\<id\>)|(contributor\>)|(\<.?revision\>)|(\<timestamp\>.*\<\/timestamp\>)|(\<comment\>.*\<\/comment\>)|(\<username\>.*\<\/username\>)|(\<minor\/\>)|(\<id\>.*\<\/id\>)|(\<restrictions\>.*\<\/restrictions\>))/){

	  #  Minimize Data(removing multiply spaces and newlines)
	  s/((\&lt\;)|(\&gt\;)|(\&))/ /gm;
	  s/ +/ /gm;
	  s/\n+/\n/gm;
	 
	  # Sorting into pages
	  if(($_ =~ /^ ?\<\/page\>/ || $count >= 4) && $pageOn){
		#end of page
		$pageOn = 0;
		$count = 0;
		$pageTypeSet = 0;
		$pageAttribSet = 0;
		$pageNumber++;
#		print "##############END###################\n";
	  }
	  if($_ =~ /\<title\>.*\<\/title\>/ && $pageOn){
		#grabing the ---------title------------
		$pageName = $_;
		$pageName =~ s/\<title\>//;
		$pageName =~ s/\<\/title\>//;
		$pageName =~ s/^ //;		
		chomp($pageName);
		$pageNames[$pageNumber] = $pageName;
#		print "[$pageNames[$pageNumber]]\n";
#		print "($pageNumber)>>[Title]>> $pageName\n";
#		print "$pageNumber, ";
		if($pageName =~ /\//){
		  #ignore
		  $pageType[$pageNumber] = 'sub-pages';
		  $pageTypeSet = 1;
		}elsif($pageName =~ /^(MediaWiki\:|Main Page)/){
		  $pageType[$pageNumber] = 'other';
		  $pageTypeSet = 1;
		}elsif($pageName =~ /^Category\:/){
		  $pageType[$pageNumber] = 'category';
		  $pageTypeSet = 1;
		}elsif($pageName =~ /^(Help|HowTo Wiki|About|WikiHowTo\:|Wikihowto\:)/){
		  $pageType[$pageNumber] = 'help';
		  $pageTypeSet = 1;
  		}elsif($pageName =~ /^Image\:/){
		  $pageType[$pageNumber] = 'image';
		  $pageTypeSet = 1;
#		  print "image ";
  		}elsif($pageName =~ /^(Talk\:|User talk\:)/){
		  $pageType[$pageNumber] = 'talk';
		  $pageTypeSet = 1;
  		}elsif($pageName =~ /^User\:/){
		  $pageType[$pageNumber] = 'user';
		  $pageTypeSet = 1;
  		}elsif($pageName =~ /^Guide /){
		  $pageType[$pageNumber] = 'guide';
		  $pageTypeSet = 1;
		}elsif($pageName =~ /^(Howto|How to|HowTo) /){
		  $pageType[$pageNumber] = 'howto';
		  $pageTypeSet = 1;
		}elsif($pageName =~ /^Template\:/){
		  $pageType[$pageNumber] = 'template';
		  $pageTypeSet = 1;
		}elsif($pageName =~ /^Historical\:/){
		  $pageType[$pageNumber] = 'historical';
		  $pageTypeSet = 1;
			}elsif($pageName =~ /^Meta\:/){
		  $pageType[$pageNumber] = 'meta';
		  $pageTypeSet = 1;
			}else{
		  $pageType[$pageNumber] = 'unknown';
		  $pageTypeSet = 1;
		}
	  }elsif($pageOn){
		#page content
		# maybe just check for page type here and not record data
		$pageData[$pageNumber] = $pageData[$pageNumber] . $_;
		$count++;
		if(!$pageAttribSet){
		  # getting the ---------Attributes-----------
		  if($_ =~ /(\{\{del\}\}|\{\{0\}\})/){
			$pageAttrib[$pageNumber] = 'delete';
			$pageAttribSet = 1;
		  }elsif($_ =~ /\#redirect/i){
			$pageAttrib[$pageNumber] = 'redirect';
			$pageTypeSet = 1;
		  }elsif($_ =~ /\{\{Stub\}\}/i){
			$pageAttrib[$pageNumber] = 'howto stub';
			$pageTypeSet = 1;
		  }elsif($_ =~ /\{\{Guide Stub\}\}/i){
			$pageAttrib[$pageNumber] = 'guide stub';
			$pageTypeSet = 1;
		  }elsif($_ =~ /\{\{object Stub\}\}/i){
$pageAttrib[$pageNumber] = 'empty';
			$pageTypeSet = 1;
		  }

		}
		if($pageType[$pageNumber] eq 'unknown'){
		  $lineObjectCheck = $_;
		  $lineObjectCheck =~ s/(\'|\:|\[|\])//gm;
		  $lineObjectCheck =~ s/  / /gm;
		  if($lineObjectCheck =~ /object wikipedia/i){
			$pageType[$pageNumber] = 'object';
		  }
		}

#		print "($pageNumber)>>[Content]>> $_\n";
#		print "($pageNumber)>>Attrib>> $pageAttrib[$pageNumber]\n";	i
#		print "($pageNumber)>>NAME>>$pageNames[$pageNumber]\n";
#		print "($pageNumber)>>Type>> $pageType[$pageNumber] >>Attrib $pageAttrib[$pageNumber]\n";		
		
	  }
	}
	  if($_ =~ /^ ?\<page\>/){
		#start of new page
		$pageOn = 1;
		$count = 0;
		$pageData[$pageNumber] ='';
		$pageType[$pageNumber] = '';
		$pageNames[$pageNumber] = '';
#		print "#############Page Start######################\n";
#		print ".";
	  }
		
	  
#	  $processedPage = $processedPage . $_;
#	  if($_ =~ /(\<)|(\>)/){
#		print $_;
#	  }
	
  }
}
close LINKS;





########################
#Clear names and Vars
#

$x=0;
$objectEmptyPages = '';
$objectPages = '';
$howtoStubPages = '';
$howtoPages = '';
$guideStubPages = '';
$guidePages = '';
$helpPages = '';
$unknownPages ='';
$redirectPages = '';
$otherPages = '';

$numOfObjects = 0;
$numOfEmptyObjects = 0;
$numOfHowtos = 0;
$numOfHowtoStubs = 0;
$numOfGuides = 0;
$numOfGuideStubs = 0;
$numOfUnknownPages = 0;
$numOfHelpPages = 0;
$numOfRedirects= 0;
$numOfOtherPages= 0;








##########################################################
# Sort page types
#

print "# Sorting Page Types\n\n";

for($x=0;$x<=$pageNumber;$x++){
#  print "\n[$x]: $pageNames[$x] <<< ";
  if( $pageType[$x] eq 'user' | $pageType[$x] eq 'talk' | $pageType[$x] eq 'image' |  $pageType[$x] eq 'category' | $pageType[$x] eq 'sub-pages' | $pageType[$x] eq 'template' | $pageType[$x] eq 'historical' ){
	#ignore
	print  "x.";#"ignored";
  }elsif($pageAttrib[$x] eq 'redirect'){
	  $redirectPages = $redirectPages . "# [[$pageNames[$x]]]\n";
	  $numOfRedirects++;
	  print "r.";#"redirect.";
  }elsif($pageAttrib[$x] eq 'delete'){
	  print "x.";#"delete.";
  }elsif($pageType[$x] eq 'object'){
	  if($pageAttrib[$x] eq 'empty'){
		$objectEmptyPages = $objectEmptyPages . "# [[$pageNames[$x]]]\n";
		$numOfEmptyObjects++;
		print "Oe.";#"empty object.";
	  }else{
		$objectPages = $objectPages . "# [[$pageNames[$x]]]\n";
		$numOfObjects++;
		print "O.";# object.";	  
	  }
  }elsif($pageType[$x] eq 'howto'){
	  if($pageAttrib[$x] eq 'howto stub'){
		$howtoStubPages = $howtoStubPages . "# [[$pageNames[$x]]]\n";
		;#"howto stub.";
		$numOfHowtoStubs++;
		$howtoPages = $howtoPages . "# [[$pageNames[$x]]] (stub)\n";
		$numOfHowtos++;
	  }else{
		$howtoPages = $howtoPages . "# [[$pageNames[$x]]]\n";
		$numOfHowtos++;
	  }
	  print "H.";#"howto.";	
  }elsif($pageType[$x] eq 'guide'){
	  if($pageAttrib[$x] eq 'guide stub'){
		$guideStubPages = $guideStubPages . "# [[$pageNames[$x]]]\n";
		$numOfGuideStubs++;
#		print "guide stub.";
		$guidePages = $guidePages . "# [[$pageNames[$x]]] (stub)\n";
		$numOfGuides++;
	  }else{
		$guidePages = $guidePages . "# [[$pageNames[$x]]]\n";
		$numOfGuides++;
	  }
	  print "G.";#"guide";	
  }elsif($pageType[$x] eq 'meta'){
	  $metaPages = $metaPages . "# [[$pageNames[$x]]]\n";
	  $numOfMetas++;
	  print "m.";#"meta.";
  }elsif($pageType[$x] eq 'unknown'){
	$unknownPages = $unknownPages . "# [[$pageNames[$x]]]\n";
	$numOfUnknownPages++;
	print "?.";#"unknown";	  
  }elsif($pageType[$x] eq 'help'){
	$helpPages = $helpPages . "# [[$pageNames[$x]]]\n";
	$numOfHelpPages++;
	print "h.";#"help";
  }elsif($pageType[$x] eq 'other'){
	$otherPages = $otherPages . "# [[$pageNames[$x]]]\n";
	$numOfOtherPages++;
	print "o.";#"Other";
  }

}

#############################################
# Extracting and Sorting Wanted Pages
#


# initialising vars
$howtoWantedPageBody = '';
$guideWantedPageBody ='';
$objectWantedPageBody ='';


@matchedLine = 0;
@wantedHowtos = 0;
@wantedGuides = 0;
@wantedObjects = 0;
@wantedHowtos = 0;

@lines = split(/\n/,$wantedPagesContent);

print "\n\n\n\n\nProcessing Wanted Pages:\n\n";
foreach $line (@lines){
        if($line =~ /index\.php\?title/){
                #print "$line\n.......\n";
                @matchedLine = (@matchedLine,$line);
                @break1 = split(/<\/a>/,$line);
                #print "$break1[0]<><<>>>><>>>\n";
                @break2 = split(/>/,$break1[0]);
                $pageName = @break2[$#break2];
                #print "$break2[$#break2]<><>>>>\n";
                if($pageName =~ /^Howto /i) {
                        @wantedHowtos = (@wantedHowtos,"*[[$pageName]]\n");
			$howtoWantedPages = $howtoWantedPages . "*[[$pageName]] \n";
                        #print "$pageName))))";
			print "h.";
                }elsif($pageName =~ /^Guide /i){
                        @wantedGuides = (@wantedGuides,"*[[$pageName]]\n");
			$guideWantedPages = $howtoWantedPages . "*[[$pageName]] \n";
                        #print "$pageName))))";
			print "g.";
                }elsif($pageName !~ /[\:\(\)\?\"\@\<\>\;\&\^\#\%\\\/\.\,\-\+\=]/ && $pageName !~ /^[a-z0-9]$/i  && $pageName !~ /500$/ && $pageName ne "Special"){
                        @wantedObjects = (@wantedObjects,"*[[$pageName]]\n");
			$objectWantedPages = $objectWantedPages . "*[[$pageName]] \n";
                        #print "$pageName))))";
			print "o.";
                }
        }
}

# $howtoWantedPages 
# $objectWantedPages
# $guideWantedPages


$numOfWantedHowtos = $#wantedHowtos;
$numOfWantedObjects = $#wantedObjects;
$numOfWantedGuides = $#wantedGuides;



#print "$numOfWantedHowtos, $numOfWantedObjects, $numOfWantedGuides";




#
#print "\n# Extracting Wanted pages.\n\n";
#@scraps = split(/( |<|>)/,$content2);
#
#$x=0;
#$y=0;
###########   NEEDS WORK  cut out the first word only??????????
#foreach $scrap (@scraps){
#  if($scrap =~ /title/ && $scrap !~ /(\?|\;|\:|Special)/){
#    @scopes = split(/"/,$scrap) ;
#    $scope = $scopes[1];  
#	print "$scope\n";
#	if($scope =~ /^Howto/i || $scope =~ /^How to/i){
#        print "Hw."; #"* [[$scope]]\n";
#		$howtoWantedPages =$howtoWantedPages . "# [[$scope]] \n";
#		$numOfWantedHowtos++;
#    }elsif($scope =~ /^Guide/i){ ##### Wanted Guide Pages
#        print "Gw."; #"G [[$scope]]\n";
#		$guideWantedPages =$guideWantedPages . "# [[$scope]] \n";
#		$numOfWantedGuides++;
#    }elsif($scope !~ / /){ ##### Wanted Guide Pages
#        print "Ow."; #"G [[$scope]]\n";
#		$objectWantedPages =$objectWantedPages . "# [[$scope]] \n";
#		$numOfWantedObjects++;
#	  }
#  }
#}






#############################################
# Page Content Assembly
#
$begining = "<center>''Number of";
# $begining Objects:{{NumOfObjects}}, Last Updated:{{NumUpdateDate}}''</center>\n\n  

$objectPageBody = "$pageBodyIntro '''Object''' $pageBodyIntro2 \n\n{| border=1 cellpadding=2 width=100%\n!width=50% valign=top|Objects<br>\n!width=50% valign=top|Empty Objects<br>\n|-\n|width=50% valign=top|<br>\n$objectPages\n|width=50% valign=top|<br>\n$objectEmptyPages\n|}\n";
$objectEmptyPageBody = "$pageBodyIntro '''Object Empty''' $pageBodyIntro2 \n\n\n\n{|style=\"background-color=#efefef\"\n|\n$objectEmptyPages\n|}\n\n";
$objectWantedPageBody = "$pageBodyIntro '''Object Empty''' $pageBodyIntro2 \n\n\n\n{|style=\"background-color=#efefef\"\n|\n$objectWantedPages\n|}\n\n";


$howtoPageBody = "$pageBodyIntro '''Howto''' $pageBodyIntro2 \n\n\n\n{|style=\"background-color=#efefef\"\n|\n$howtoPages\n|}\n\n";
$howtoStubPageBody = "$pageBodyIntro '''HowtoStub''' $pageBodyIntro2 \n\n\n\n{|style=\"background-color=#efefef\"\n|\n$howtoStubPages\n|}\n\n";
$howtoWantedPageBody = "$pageBodyIntro '''HowtoStub''' $pageBodyIntro2 \n\n\n\n{|style=\"background-color=#efefef\"\n|\n$howtoWantedPages\n|}\n\n";


$guidePageBody =  "$pageBodyIntro '''Guide''' $pageBodyIntro2 \n\n\n\n{|style=\"background-color=#efefef\"\n|\n$guidePages\n|}\n\n";
$guideStubPageBody =  "$pageBodyIntro '''Guide Stub''' $pageBodyIntro2 \n\n\n\n{|style=\"background-color=#efefef\"\n|\n$guideStubPages\n|}\n\n";
$guideWantedPageBody =  "$pageBodyIntro '''Guide Stub''' $pageBodyIntro2 \n\n\n\n{|style=\"background-color=#efefef\"\n|\n$guideWantedPages\n|}\n\n";


$redirectPageBody =  "$pageBodyIntro '''Redirect''' $pageBodyIntro2 \n\n\n\n{|style=\"background-color=#efefef\"\n|\n$redirectPages\n|}\n\n";
$unknownPageBody =  "$pageBodyIntro '''Unknown Pages''' $pageBodyIntro2 \n\n\n\n{|style=\"background-color=#efefef\"\n|\n$unknownPages\n|}\n\n";
$helpPageBody =  "$pageBodyIntro '''Help''' $pageBodyIntro2 \n\n\n\n{|style=\"background-color=#efefef\"\n|\n$helpPages\n|}\n\n";

$metaPageBody =  "$pageBodyIntro '''Help''' $pageBodyIntro2 \n\n\n\n{|style=\"background-color=#efefef\"\n|\n$metaPages\n|}\n\n";

# print "$helpPageBody $unknownPageBody $howtoPageBody $guidePageBody $redirectPageBody $objectPageBody"; 


###################################################
# Checking to see if  pages were parsed right
#

die "the count was inproper, stopped " if($numOfObjects==0||$numOfHowtos==0);



###############################################3
##  Login to the Server
##
print "\n\n######################\n# Loging onto Wikihowto.\n#\n";
$mw = CMS::MediaWiki->new(
        host  => 'howto.wikia.com',
        path  => '.' ,     #  Can be empty on 3rd-level domain Wikis
        debug => 0            #  0=no debug msgs, 1=some msgs, 2=more msgs
);
$rc = $mw->login( user => "$username", pass => "$password" );










######################
# connect to timeserver
#

#print "# Connecting to time server....\n\n";
#
#$ntp_msg = get_ntp_time;
#interpret_ntp_data($ntp_msg);
#if (($LocalTime0H . $LocalTime0FH) ne ($OriginateTimeH . $OriginateTimeFH)) {
#    print "*** The received reply seems to be faulty and NOT the reply to our request packet:\n";
#    print "*** The OriginateTime stamp $OriginateTimeH.$OriginateTimeFH of the received packet does not \n";
#    print "***  show our Transmit Time $LocalTime0H.$LocalTime0FH.\n";
#    exit;
#}
#calculate_time_data;
#print "$ntp_msg, $LocalTime0H . $LocalTime0FH, $OriginateTimeH . $OriginateTimeFH\n";
#



@months = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
@weekDays = qw(Sun Mon Tue Wed Thu Fri Sat Sun);
($second, $minute, $hour, $dayOfMonth, $month, $yearOffset, $dayOfWeek, $dayOfYear, $daylightSavings) = gmtime();
$year = 1900 + $yearOffset;
$theGMTime = "$hour:$minute, $weekDays[$dayOfWeek] $months[$month] $dayOfMonth, $year";
$currentTime = $theGMTime;
print "$currentTime\n";







#################################################
#################################################
####  Update Pages
####
####



# Writing the page lists 
if($writePages){
  print "## Posting Page Listing and Counts.\n#\n\n";
  
  print"#Posting pages\n";
  
  print "guides...\n";
  &updatePage($guidePage,$guidePageBody);
  print "Wanted guides...\n";
  &updatePage($guideWantedPage,$guideWantedPageBody);
  print "guide stubs...\n";
  &updatePage($guideStubPage,$guideStubPageBody);

  print "howto...\n";
  &updatePage($howtoPage,$howtoPageBody);
  print "Wanted howto...\n";
  &updatePage($howtoWantedPage,$howtoWantedPageBody);
  print "howto stubs...\n";
  &updatePage($howtoStubPage,$howtoStubPageBody);

  print "objects...\n";
  &updatePage($objectPage,$objectPageBody);
  print "empty objects...\n";
  &updatePage($objectEmptyPage,$objectEmptyPageBody);
  print "wanted objects...\n";

  &updatePage($metaPage,$metaPageBody);
  print "meta page...\n";

  &updatePage($objectWantedPage,$objectWantedPageBody);
  
  print "unknowns...\n";
  &updatePage($unknownPage,$unknownPageBody);
  print "helps...\n";
  &updatePage($helpPage,$helpPageBody);
  print "redirects...\n";
  &updatePage($redirectPage,$redirectPageBody);

  print "#posting page counts\n";
  # writing the pages counts
  # to the server
  print "howto count...\n";
  &updatePage($tmpHowto,$numOfHowtos);
  print "howto wanted count...\n";
  &updatePage($tmpWantedHowto,$numOfWantedHowtos);
  print "howto stub count...\n";
  &updatePage($tmpStubHowto,$numOfHowtoStubs);
  
  
  print "guide count...\n";
  &updatePage($tmpGuide,$numOfGuides);
  print "wanted guide count...\n";
  &updatePage($tmpWantedGuide,$numOfWantedGuides);
  print "guide stub count...\n";
  &updatePage($tmpStubGuide,$numOfGuideStubs);
  
  
  print "object count...\n";
  &updatePage($tmpObject,$numOfObjects);
  print "wanted object count...\n";
  &updatePage($tmpWantedObject,$numOfWantedObjects);
  print "empty object count...\n";
  &updatePage($tmpEmptyObject,$numOfEmptyObjects);
  
  
  print "unknown count...\n";
  &updatePage($tmpUnknown,$numOfUnknownPages);
  print "help count...\n";
  &updatePage($tmpHelp,$numOfHelpPages);
  print "redirect count...\n";
  &updatePage($tmpRedir,$numOfRedirects);
  
   print "meta count...\n";
  &updatePage($tmpMeta,$numOfMetas);
 
  # Write time of update
  print "#Updating time stamp\n";
  &updatePage("Template:NumUpdateDate",$currentTime);
}



print "# ALL DONE. \n";
  
  
  
  
  
  
  






######################################################################
#######################################################################
#####################################################################
# DO NOT EDIT the code below, unless you want to
# Its for extracting time, in an excesive and unessesarally
# precice way
######################################################################
####################################################################
# Begining of rediculus method for geting the time
# 
sub bin2frac { # convert a binary string to fraction
    my @bin = split '', shift;
    my $frac = 0;
    while (@bin) {
      $frac = ($frac + pop @bin)/2;
    }
    $frac;
  } # end sub bin2frac
  sub frac2bin { # convert a fraction to binary string (B32)
    my $frac = shift;
    my $bin ="";
    while (length($bin) < 32) {
      $bin = $bin . int($frac*2);
      $frac = $frac*2 - int($frac*2);
    }
    $bin;
  } # end sub frac2bin
sub get_ntp_time {
  # open the connection to the ntp server,
  # prepare the ntp request packet
  # send and receive
  # take local timestamps before and after

    my ($remote);
    my ($rin, $rout, $eout) ="";
    my $ntp_msg;

    # open the connection to the ntp server
    $remote = IO::Socket::INET -> new(Proto => "udp", PeerAddr => $server,
                                      PeerPort => 123,
                                      Timeout => $timeout)
                                  or die "Can't connect to \"$server\"\n";
    # measure local time BEFORE timeserver query
    $LocalTime1 = time();
    # convert fm unix epoch time to NTP timestamp
    $LocalTime0 = $LocalTime1 + 2208988800;

    # prepare local timestamp for transmission in our request packet
    $LocalTime0F = $LocalTime0 - int($LocalTime0);
    $LocalTime0FB = frac2bin($LocalTime0F);
    $LocalTime0H = unpack("H8",(pack("N", int($LocalTime0))));
    $LocalTime0FH = unpack("H8",(pack("B32", $LocalTime0FB)));

    $ntp_msg = pack("B8 C3 N10 B32", '00011011', (0)x12, int($LocalTime0), $LocalTime0FB);
                   # LI=0, VN=3, Mode=3 (client), remainder msg is 12 nulls
                   # and the local TxTimestamp derived from $LocalTime1
    # send the ntp-request to the server
    $remote -> send($ntp_msg) or return undef;
    vec($rin, fileno($remote), 1) = 1;
    select($rout=$rin, undef, $eout=$rin, $timeout)
      or do {print "No answer from $server\n"; exit};

    # receive the ntp-message from the server
    $remote -> recv($ntp_msg, length($ntp_msg))
               or do {print "Receive error from $server ($!)\n"; exit};

    # measure local time AFTER timeserver query
    $LocalTime2 = time();
    $ntp_msg;
  } # end sub get_ntp_time------------------------------ 
  sub interpret_ntp_data {
  # do some interpretations of the data
    my $ntp_msg = shift;
    # unpack the received ntp-message into long integer and binary values
    ( $Byte1, $Stratum, $Poll, $Precision,
      $RootDelay, $RootDelayFB, $RootDisp, $RootDispFB, $ReferenceIdent,
      $ReferenceTime, $ReferenceTimeFB, $OriginateTime, $OriginateTimeFB,
      $ReceiveTime, $ReceiveTimeFB, $TransmitTime, $TransmitTimeFB) =
      unpack ("a C3   n B16 n B16 H8   N B32 N B32   N B32 N B32", $ntp_msg);

    # again unpack the received ntp-message into hex and ASCII values
    ( $dummy, $dummy, $dummy, $dummy,
      $RootDelayH, $RootDelayFH, $RootDispH, $RootDispFH, $ReferenceIdentT,
      $ReferenceTimeH, $ReferenceTimeFH, $OriginateTimeH, $OriginateTimeFH,
      $ReceiveTimeH, $ReceiveTimeFH, $TransmitTimeH, $TransmitTimeFH) =
      unpack ("a C3   H4 H4 H4 H4 a4   H8 H8 H8 H8   H8 H8 H8 H8", $ntp_msg);

    $LI = unpack("C", $Byte1 & "\xC0") >> 6;
    $VN = unpack("C", $Byte1 & "\x38") >> 3;
    $Mode = unpack("C", $Byte1 & "\x07");
    if ($Stratum < 2) {$sc = $Stratum;}
    else {
      if ($Stratum > 1) {
        if ($Stratum < 16) {$sc = 2;} else {$sc = 16;}
      }
    }
    $PollT = 2**($Poll);
    if ($Precision > 127) {$Precision = $Precision - 255;}
    $PrecisionV = sprintf("%1.4e",2**$Precision);
    $RootDelay += bin2frac($RootDelayFB);
    $RootDelay = sprintf("%.4f", $RootDelay);
    $RootDisp += bin2frac($RootDispFB);
    $RootDisp = sprintf("%.4f", $RootDisp);
    $ReferenceT = "";
    if ($Stratum eq 1) {$ReferenceT = "[$ReferenceIdentT]";}
    else {
      if ($Stratum eq 2) {
        if ($VN eq 3) {
          $ReferenceIPv4 = sprintf("%d.%d.%d.%d",unpack("C4",$ReferenceIdentT));
          $ReferenceT = "[32bit IPv4 address $ReferenceIPv4 of the ref src]";
        }
        else {
          if ($VN eq 4) {$ReferenceT = "[low 32bits of latest TX timestamp of reference src]";}
        }
      }
    }

    $ReferenceTime += bin2frac($ReferenceTimeFB);
    $OriginateTime += bin2frac($OriginateTimeFB);
    $ReceiveTime += bin2frac($ReceiveTimeFB);
    $TransmitTime += bin2frac($TransmitTimeFB);

  } # end sub interpret_ntp_data ----------------------------------
  sub calculate_time_data {
  # convert time stamps to unix epoch and do some calculations on the time data

    my ($sec, $min, $hr, $dy, $mo, $yr);

    $ReferenceTime -= 2208988800; # convert to unix epoch time stamp
    $OriginateTime -= 2208988800; 
    $ReceiveTime -= 2208988800; 
    $TransmitTime -= 2208988800; 

    $NetTime = scalar(gmtime $TransmitTime);
    $Netfraction = sprintf("%03.f",1000*sprintf("%.3f", $TransmitTime - int($TransmitTime)));
    ($sec, $min, $hr, $dy, $mo, $yr) = gmtime($TransmitTime);
    $NetTime2 = sprintf("%04d-%02d-%02d %02d:%02d:%02d", $yr+1900, $mo+1, $dy, $hr, $min, $sec);

    # calculate delay and difference
    $netround = sprintf("%+.4f",($LocalTime1 - $LocalTime2));
    $netdelay = sprintf("%+.4f",(($LocalTime1 - $LocalTime2)/2) - ($TransmitTime - $ReceiveTime));
    $off = sprintf("%+.4f",(($ReceiveTime - $LocalTime1) + ($TransmitTime - $LocalTime2))/2);

    $LocalTime = ($LocalTime1 + $LocalTime2) /2;
    $LocalTimeF = sprintf("%03.f",1000*sprintf("%.3f", $LocalTime - int($LocalTime)));
    ($sec, $min, $hr, $dy, $mo, $yr) = gmtime($LocalTime);
    $LocalTimeT = sprintf("%04d-%02d-%02d %02d:%02d:%02d", $yr+1900, $mo+1, $dy, $hr, $min, $sec);

  } # end sub calculate_time_data--------------------------------
  ########################End of Funtions############################


#################









Version 0.3 April, 2007

#!/usr/bin/perl 


####################################
# Modules
#

use CMS::MediaWiki;
use LWP::Simple;
use Net::hostent;
use Socket;
use IO::Socket;
use Time::HiRes qw(time);
use Getopt::Std;



#########################################
# Test options
#

# yes or no
$download = 1;
$downloadWanted = 1;
$writePages = 1;


#########################################
#     Time Server Settings
# some setting that 
# need to be done first
# to enable the time 
# getter in a extreamly 
# rediculus way
#

my $server = "1.pool.ntp.org";
my $serverIPv4 ="";
if (gethostbyname($server)) {
    $serverIPv4 = sprintf("%d.%d.%d.%d",unpack("C4",gethostbyname($server)));
}
my $timeout = 2;



########################################
########################################
###     Variables
###
###

$root = "http://howto.wikia.com"; #no slash

$backupFileName='pages_current.xml';
$username = 'xxxxxxxxxxxxxx';
$password = 'xxxxxxxxxxxxxx';
$sleepInt = 4; # max time(sec) for random sleep interval between extractions, and posts (integer or float)
$rootDir = "";

$host2 = "http://howto.wikia.com/index.php?title=Special:Wantedpages&limit=500&offset=0";


####################
# Page Names

# File named for lists or pages

$objectPage = $rootDir . "Help:Objects List";
$objectEmptyPage = $rootDir . "Help:Empty Objects";
$objectWantedPage = $rootDir . "Help:Wanted Objects";

$howtoPage = $rootDir . "Help:Howto List";
$howtoStubPage = $rootDir . "Help:Howto Stub List";
$howtoWantedPage = $rootDir . "Help:Wanted Howtos";

$guidePage = $rootDir . "Help:Guide List";
$guideStubPage = $rootDir . "Help:Guide Stub List";
$guideWantedPage = $rootDir . "Help:Wanted Guides";

$helpPage = $rootDir . "Help:Help Pages List";
$redirectPage = $rootDir . "Help:Redirects List";
$unknownPage = $rootDir . "Help:Unknown List";
$metaPage = $rootDir . "Help:Meta List";



# Templates to write the total amount of each page type
$tmpStubHowto = $rootDir . "Template:numOfStubHowtos";
$tmpHowto = $rootDir . "Template:numOfHowtos";
$tmpWantedHowto = $rootDir . "Template:numOfWantedHowtos";

$tmpStubGuide = $rootDir . "Template:numOfStubGuides";
$tmpGuide = $rootDir . "Template:numOfGuides";
$tmpWantedGuide = $rootDir . "Template:numOfWantedGuides";


$tmpWantedObject = $rootDir . "Template:numOfWantedObjects";
$tmpEmptyObject = $rootDir . "Template:numOfEmptyObjects";
$tmpObject = $rootDir . "Template:numOfObjects";

$tmpUnknown = $rootDir . "Template:numOfUnknowns";
$tmpHelp = $rootDir . "Template:numOfHelps";
$tmpRedir = $rootDir . "Template:numOfRedirects";

$tmpMeta = $rootDir . "Template:numOfMetas";





#############################
# Messages
 
# Messages to be printed on the page lists
$pageBodyIntro = "\nThis page contains a list of all";
$pageBodyIntro2 = "on Wikihowto. It is not intended to be used as a catalog, but more of an index. If you are searching for a specific subject use the search box or See: The full [[Help:Object Lists|]]\n\n\nThis page was created by a bot and the page is refreshed weekly. For more information See [[Help:Bots]]  You can add a link on this page and it it will be processed the next rotation, but its generally suggested to make a link on your user page.  \n\n\nSee Also: [[Object List]], [[Howto List]], [[Guide List]], [[Help:All_page_types]]\n----";






##################################
#  Write woli paghes subroutine
#

sub updatePage
{
  local($a, $b);		
  ($a, $b) = ($_[0], $_[1]);	
  #Post the page
  if($a eq ''){
	  print "!!!!!!!!!!!!!!!!!!!!!!!!!!";
	  print "!! name error: name empty";
	  print "!!!!!!!!!!!!!!!!!!!!!!!!!!";
  }else{
	  $rc = $mw->editPage(   
	  title   => "$a" ,
	  section => '' , 
	  text    => "$b" ,
	  summary => "Updated via Bot." ,
	  );
	# randome sleep from 0-n in sec
	# to put less load on server
  }
  my $b = rand($sleepInt);
  system(" sleep $b");
}





###########################################
# Downloading Database
#

if($download){
  print "\n# Get the database\n\n";
  system("rm -f pages_current.xml");
  system("wget http://howto.wikia.com/dbdumps/pages_current.xml");
}
if($downloadWanted){
  print "\n# Getting wanted pages\n\n";
  $content2 = get($host2);  
}



#########################################
#     Time Server Settings
# some setting that 
# need to be done first
# to enable the time 
# getter in a extreamly 
# rediculus way
#

my $server = "1.pool.ntp.org";
my $serverIPv4 ="";
if (gethostbyname($server)) {
    $serverIPv4 = sprintf("%d.%d.%d.%d",unpack("C4",gethostbyname($server)));
}
my $timeout = 2;



########################################################
########################################################
#### Initialization
####
####


$pageNumber = 0;
@pageData = ();
@pageNames = ();
@pageType =();
@pageAttrib =();
$count=0;
$pageOn=0;

##############################
#  Initializing the time variables
#  
 my ($LocalTime0, $LocalTime0F, $LocalTime0H, $LocalTime0FH, $LocalTime0FB);
  my ($LocalTime1, $LocalTime2);
  my ($LocalTime, $LocalTimeF, $LocalTimeT);
  my ($NetTime, $NetTime2, $Netfraction);
  my ($netround, $netdelay, $off);
  
  my ($Byte1, $Stratum, $Poll, $Precision,
      $RootDelay, $RootDelayFB, $RootDisp, $RootDispFB, $ReferenceIdent,
      $ReferenceTime, $ReferenceTimeFB, $OriginateTime, $OriginateTimeFB,
      $ReceiveTime, $ReceiveTimeFB, $TransmitTime, $TransmitTimeFB);
  my ($dummy, $RootDelayH, $RootDelayFH, $RootDispH, $RootDispFH, $ReferenceIdentT,
      $ReferenceTimeH, $ReferenceTimeFH, $OriginateTimeH, $OriginateTimeFH,
      $ReceiveTimeH, $ReceiveTimeFH, $TransmitTimeH, $TransmitTimeFH);
  my ($LI, $VN, $Mode, $sc, $PollT, $PrecisionV, $ReferenceT, $ReferenceIPv4);
  
  my $ntp_msg;  # NTP message according to NTP/SNTP protocol specification







  #######################################################
  ########################################################
  #### Page Sorting
  ####
  ####

  
#########################
# Inital page sorting
#

print "# Page Extraction\n\n";

open(LINKS, "<$backupFileName") || die("Could not open file!");
while(<LINKS>){
  if($_ !~ /((\<namespace )|([\/\<]namespaces\>)|(^\<mediawiki xmlns)|(\<sitename\>.*\<\/sitename\>)|(\<generator\>.*\<\/generator\>)|(\<base\>.*\<\/base\>)|(\<case\>.*\<\/case\>)|([\/\<]siteinfo\>))/){
	if($_ !~ /((\<id\>)|(contributor\>)|(\<.?revision\>)|(\<timestamp\>.*\<\/timestamp\>)|(\<comment\>.*\<\/comment\>)|(\<username\>.*\<\/username\>)|(\<minor\/\>)|(\<id\>.*\<\/id\>)|(\<restrictions\>.*\<\/restrictions\>))/){

	  #  Minimize Data(removing multiply spaces and newlines)
	  s/((\&lt\;)|(\&gt\;)|(\&))/ /gm;
	  s/ +/ /gm;
	  s/\n+/\n/gm;
	 
	  # Sorting into pages
	  if(($_ =~ /^ ?\<\/page\>/ || $count >= 4) && $pageOn){
		#end of page
		$pageOn = 0;
		$count = 0;
		$pageTypeSet = 0;
		$pageAttribSet = 0;
		$pageNumber++;
#		print "##############END###################\n";
	  }
	  if($_ =~ /\<title\>.*\<\/title\>/ && $pageOn){
		#grabing the ---------title------------
		$pageName = $_;
		$pageName =~ s/\<title\>//;
		$pageName =~ s/\<\/title\>//;
		$pageName =~ s/^ //;		
		chomp($pageName);
		$pageNames[$pageNumber] = $pageName;
#		print "[$pageNames[$pageNumber]]\n";
#		print "($pageNumber)>>[Title]>> $pageName\n";
#		print "$pageNumber, ";
		if($pageName =~ /\//){
		  #ignore
		  $pageType[$pageNumber] = 'sub-pages';
		  $pageTypeSet = 1;
		}elsif($pageName =~ /^(MediaWiki\:|Main Page)/){
		  $pageType[$pageNumber] = 'other';
		  $pageTypeSet = 1;
		}elsif($pageName =~ /^Category\:/){
		  $pageType[$pageNumber] = 'category';
		  $pageTypeSet = 1;
		}elsif($pageName =~ /^(Help|HowTo Wiki|About|WikiHowTo\:|Wikihowto\:)/){
		  $pageType[$pageNumber] = 'help';
		  $pageTypeSet = 1;
  		}elsif($pageName =~ /^Image\:/){
		  $pageType[$pageNumber] = 'image';
		  $pageTypeSet = 1;
#		  print "image ";
  		}elsif($pageName =~ /^(Talk\:|User talk\:)/){
		  $pageType[$pageNumber] = 'talk';
		  $pageTypeSet = 1;
  		}elsif($pageName =~ /^User\:/){
		  $pageType[$pageNumber] = 'user';
		  $pageTypeSet = 1;
  		}elsif($pageName =~ /^Guide /){
		  $pageType[$pageNumber] = 'guide';
		  $pageTypeSet = 1;
		}elsif($pageName =~ /^(Howto|How to|HowTo) /){
		  $pageType[$pageNumber] = 'howto';
		  $pageTypeSet = 1;
		}elsif($pageName =~ /^Template\:/){
		  $pageType[$pageNumber] = 'template';
		  $pageTypeSet = 1;
		}elsif($pageName =~ /^Historical\:/){
		  $pageType[$pageNumber] = 'historical';
		  $pageTypeSet = 1;
			}elsif($pageName =~ /^Meta\:/){
		  $pageType[$pageNumber] = 'meta';
		  $pageTypeSet = 1;
			}else{
		  $pageType[$pageNumber] = 'unknown';
		  $pageTypeSet = 1;
		}
	  }elsif($pageOn){
		#page content
		# maybe just check for page type here and not record data
		$pageData[$pageNumber] = $pageData[$pageNumber] . $_;
		$count++;
		if(!$pageAttribSet){
		  # getting the ---------Attributes-----------
		  if($_ =~ /(\{\{del\}\}|\{\{0\}\})/){
			$pageAttrib[$pageNumber] = 'delete';
			$pageAttribSet = 1;
		  }elsif($_ =~ /\#redirect/i){
			$pageAttrib[$pageNumber] = 'redirect';
			$pageTypeSet = 1;
		  }elsif($_ =~ /\{\{Stub\}\}/i){
			$pageAttrib[$pageNumber] = 'howto stub';
			$pageTypeSet = 1;
		  }elsif($_ =~ /\{\{Guide Stub\}\}/i){
			$pageAttrib[$pageNumber] = 'guide stub';
			$pageTypeSet = 1;
		  }elsif($_ =~ /\{\{object Stub\}\}/i){
$pageAttrib[$pageNumber] = 'empty';
			$pageTypeSet = 1;
		  }

		}
		if($pageType[$pageNumber] eq 'unknown'){
		  $lineObjectCheck = $_;
		  $lineObjectCheck =~ s/(\'|\:|\[|\])//gm;
		  $lineObjectCheck =~ s/  / /gm;
		  if($lineObjectCheck =~ /object wikipedia/i){
			$pageType[$pageNumber] = 'object';
		  }
		}

#		print "($pageNumber)>>[Content]>> $_\n";
#		print "($pageNumber)>>Attrib>> $pageAttrib[$pageNumber]\n";	i
#		print "($pageNumber)>>NAME>>$pageNames[$pageNumber]\n";
#		print "($pageNumber)>>Type>> $pageType[$pageNumber] >>Attrib $pageAttrib[$pageNumber]\n";		
		
	  }
	}
	  if($_ =~ /^ ?\<page\>/){
		#start of new page
		$pageOn = 1;
		$count = 0;
		$pageData[$pageNumber] ='';
		$pageType[$pageNumber] = '';
		$pageNames[$pageNumber] = '';
#		print "#############Page Start######################\n";
#		print ".";
	  }
		
	  
#	  $processedPage = $processedPage . $_;
#	  if($_ =~ /(\<)|(\>)/){
#		print $_;
#	  }
	
  }
}
close LINKS;





########################
#Clear names and Vars
#

$x=0;
$objectEmptyPages = '';
$objectPages = '';
$howtoStubPages = '';
$howtoPages = '';
$guideStubPages = '';
$guidePages = '';
$helpPages = '';
$unknownPages ='';
$redirectPages = '';
$otherPages = '';

$numOfObjects = 0;
$numOfEmptyObjects = 0;
$numOfHowtos = 0;
$numOfHowtoStubs = 0;
$numOfGuides = 0;
$numOfGuideStubs = 0;
$numOfUnknownPages = 0;
$numOfHelpPages = 0;
$numOfRedirects= 0;
$numOfOtherPages= 0;








##########################################################
# Sort page types
#

print "# Sorting Page Types\n\n";

for($x=0;$x<=$pageNumber;$x++){
#  print "\n[$x]: $pageNames[$x] <<< ";
  if( $pageType[$x] eq 'user' | $pageType[$x] eq 'talk' | $pageType[$x] eq 'image' |  $pageType[$x] eq 'category' | $pageType[$x] eq 'sub-pages' | $pageType[$x] eq 'template' | $pageType[$x] eq 'historical' ){
	#ignore
	print  "x.";#"ignored";
  }elsif($pageAttrib[$x] eq 'redirect'){
	  $redirectPages = $redirectPages . "# [[$pageNames[$x]]]\n";
	  $numOfRedirects++;
	  print "r.";#"redirect.";
  }elsif($pageAttrib[$x] eq 'delete'){
	  print "x.";#"delete.";
  }elsif($pageType[$x] eq 'object'){
	  if($pageAttrib[$x] eq 'empty'){
		$objectEmptyPages = $objectEmptyPages . "# [[$pageNames[$x]]]\n";
		$numOfEmptyObjects++;
		print "Oe.";#"empty object.";
	  }else{
		$objectPages = $objectPages . "# [[$pageNames[$x]]]\n";
		$numOfObjects++;
		print "O.";# object.";	  
	  }
  }elsif($pageType[$x] eq 'howto'){
	  if($pageAttrib[$x] eq 'howto stub'){
		$howtoStubPages = $howtoStubPages . "# [[$pageNames[$x]]]\n";
		;#"howto stub.";
		$numOfHowtoStubs++;
		$howtoPages = $howtoPages . "# [[$pageNames[$x]]] (stub)\n";
		$numOfHowtos++;
	  }else{
		$howtoPages = $howtoPages . "# [[$pageNames[$x]]]\n";
		$numOfHowtos++;
	  }
	  print "H.";#"howto.";	
  }elsif($pageType[$x] eq 'guide'){
	  if($pageAttrib[$x] eq 'guide stub'){
		$guideStubPages = $guideStubPages . "# [[$pageNames[$x]]]\n";
		$numOfGuideStubs++;
#		print "guide stub.";
		$guidePages = $guidePages . "# [[$pageNames[$x]]] (stub)\n";
		$numOfGuides++;
	  }else{
		$guidePages = $guidePages . "# [[$pageNames[$x]]]\n";
		$numOfGuides++;
	  }
	  print "G.";#"guide";	
  }elsif($pageType[$x] eq 'meta'){
	  $metaPages = $metaPages . "# [[$pageNames[$x]]]\n";
	  $numOfMetas++;
	  print "m.";#"meta.";
  }elsif($pageType[$x] eq 'unknown'){
	$unknownPages = $unknownPages . "# [[$pageNames[$x]]]\n";
	$numOfUnknownPages++;
	print "?.";#"unknown";	  
  }elsif($pageType[$x] eq 'help'){
	$helpPages = $helpPages . "# [[$pageNames[$x]]]\n";
	$numOfHelpPages++;
	print "h.";#"help";
  }elsif($pageType[$x] eq 'other'){
	$otherPages = $otherPages . "# [[$pageNames[$x]]]\n";
	$numOfOtherPages++;
	print "o.";#"Other";
  }

}

#############################################
# Extracting and Sorting Wanted Pages
#


# initialising vars
$howtoWantedPageBody = '';
$guideWantedPageBody ='';
$objectWantedPageBody ='';


print "\n# Extracting Wanted pages.\n\n";
@scraps = split(/( |<|>)/,$content2);

$x=0;
$y=0;
##########   NEEDS WORK  cut out the first word only??????????
foreach $scrap (@scraps){
  if($scrap =~ /title/ && $scrap !~ /(\?|\;|\:|Special)/){
    @scopes = split(/"/,$scrap) ;
    $scope = $scopes[1];  
	print "$scope\n";
	if($scope =~ /^Howto/i || $scope =~ /^How to/i){
        print "Hw."; #"* [[$scope]]\n";
		$howtoWantedPages =$howtoWantedPages . "# [[$scope]] \n";
		$numOfWantedHowtos++;
    }elsif($scope =~ /^Guide/i){ ##### Wanted Guide Pages
        print "Gw."; #"G [[$scope]]\n";
		$guideWantedPages =$guideWantedPages . "# [[$scope]] \n";
		$numOfWantedGuides++;
    }elsif($scope !~ / /){ ##### Wanted Guide Pages
        print "Ow."; #"G [[$scope]]\n";
		$objectWantedPages =$objectWantedPages . "# [[$scope]] \n";
		$numOfWantedObjects++;
	  }
  }
}






#############################################
# Page Content Assembly
#

$objectPageBody = "$pageBodyIntro '''Object''' $pageBodyIntro2 \n\n{| border=1 cellpadding=2 width=100%\n!width=50% valign=top|Objects<br>\n!width=50% valign=top|Empty Objects<br>\n|-\n|width=50% valign=top|<br>\n$objectPages\n|width=50% valign=top|<br>\n$objectEmptyPages\n|}\n";
$objectEmptyPageBody = "$pageBodyIntro '''Object Empty''' $pageBodyIntro2 \n\n\n\n{|style=\"background-color=#efefef\"\n|\n$objectEmptyPages\n|}\n\n";
$objectWantedPageBody = "$pageBodyIntro '''Object Empty''' $pageBodyIntro2 \n\n\n\n{|style=\"background-color=#efefef\"\n|\n$objectWantedPages\n|}\n\n";


$howtoPageBody = "$pageBodyIntro '''Howto''' $pageBodyIntro2 \n\n\n\n{|style=\"background-color=#efefef\"\n|\n$howtoPages\n|}\n\n";
$howtoStubPageBody = "$pageBodyIntro '''HowtoStub''' $pageBodyIntro2 \n\n\n\n{|style=\"background-color=#efefef\"\n|\n$howtoStubPages\n|}\n\n";
$howtoWantedPageBody = "$pageBodyIntro '''HowtoStub''' $pageBodyIntro2 \n\n\n\n{|style=\"background-color=#efefef\"\n|\n$howtoWantedPages\n|}\n\n";


$guidePageBody =  "$pageBodyIntro '''Guide''' $pageBodyIntro2 \n\n\n\n{|style=\"background-color=#efefef\"\n|\n$guidePages\n|}\n\n";
$guideStubPageBody =  "$pageBodyIntro '''Guide Stub''' $pageBodyIntro2 \n\n\n\n{|style=\"background-color=#efefef\"\n|\n$guideStubPages\n|}\n\n";
$guideWantedPageBody =  "$pageBodyIntro '''Guide Stub''' $pageBodyIntro2 \n\n\n\n{|style=\"background-color=#efefef\"\n|\n$guideWantedPages\n|}\n\n";


$redirectPageBody =  "$pageBodyIntro '''Redirect''' $pageBodyIntro2 \n\n\n\n{|style=\"background-color=#efefef\"\n|\n$redirectPages\n|}\n\n";
$unknownPageBody =  "$pageBodyIntro '''Unknown Pages''' $pageBodyIntro2 \n\n\n\n{|style=\"background-color=#efefef\"\n|\n$unknownPages\n|}\n\n";
$helpPageBody =  "$pageBodyIntro '''Help''' $pageBodyIntro2 \n\n\n\n{|style=\"background-color=#efefef\"\n|\n$helpPages\n|}\n\n";

$metaPageBody =  "$pageBodyIntro '''Help''' $pageBodyIntro2 \n\n\n\n{|style=\"background-color=#efefef\"\n|\n$metaPages\n|}\n\n";

# print "$helpPageBody $unknownPageBody $howtoPageBody $guidePageBody $redirectPageBody $objectPageBody"; 





###############################################3
##  Login to the Server
##
print "\n\n######################\n# Loging onto Wikihowto.\n#\n";
$mw = CMS::MediaWiki->new(
        host  => 'howto.wikia.com',
        path  => '.' ,     #  Can be empty on 3rd-level domain Wikis
        debug => 0            #  0=no debug msgs, 1=some msgs, 2=more msgs
);
$rc = $mw->login( user => "$username", pass => "$password" );










######################
# connect to timeserver
#

print "# Connecting to time server....\n\n";

$ntp_msg = get_ntp_time;
interpret_ntp_data($ntp_msg);
if (($LocalTime0H . $LocalTime0FH) ne ($OriginateTimeH . $OriginateTimeFH)) {
    print "*** The received reply seems to be faulty and NOT the reply to our request packet:\n";
    print "*** The OriginateTime stamp $OriginateTimeH.$OriginateTimeFH of the received packet does not \n";
    print "***  show our Transmit Time $LocalTime0H.$LocalTime0FH.\n";
    exit;
}
calculate_time_data;













#################################################
#################################################
####  Update Pages
####
####



# Writing the page lists 
if($writePages){
  print "## Posting Page Listing and Counts.\n#\n\n";
  
  print"#Posting pages\n";
  
  print "guides...\n";
  &updatePage($guidePage,$guidePageBody);
  print "Wanted guides...\n";
  &updatePage($guideWantedPage,$guideWantedPageBody);
  print "guide stubs...\n";
  &updatePage($guideStubPage,$guideStubPageBody);

  print "howto...\n";
  &updatePage($howtoPage,$howtoPageBody);
  print "Wanted howto...\n";
  &updatePage($howtoWantedPage,$howtoWantedPageBody);
  print "howto stubs...\n";
  &updatePage($howtoStubPage,$howtoStubPageBody);

  print "objects...\n";
  &updatePage($objectPage,$objectPageBody);
  print "empty objects...\n";
  &updatePage($objectEmptyPage,$objectEmptyPageBody);
  print "wanted objects...\n";

  &updatePage($metaPage,$metaPageBody);
  print "meta page...\n";

  &updatePage($objectWantedPage,$objectWantedPageBody);
  
  print "unknowns...\n";
  &updatePage($unknownPage,$unknownPageBody);
  print "helps...\n";
  &updatePage($helpPage,$helpPageBody);
  print "redirects...\n";
  &updatePage($redirectPage,$redirectPageBody);

  print "#posting page counts\n";
  # writing the pages counts
  # to the server
  print "howto count...\n";
  &updatePage($tmpHowto,$numOfHowtos);
  print "howto wanted count...\n";
  &updatePage($tmpWantedHowto,$numOfWantedHowtos);
  print "howto stub count...\n";
  &updatePage($tmpStubHowto,$numOfHowtoStubs);
  
  
  print "guide count...\n";
  &updatePage($tmpGuide,$numOfGuides);
  print "wanted guide count...\n";
  &updatePage($tmpWantedGuide,$numOfWantedGuides);
  print "guide stub count...\n";
  &updatePage($tmpStubGuide,$numOfGuideStubs);
  
  
  print "object count...\n";
  &updatePage($tmpObject,$numOfObjects);
  print "wanted object count...\n";
  &updatePage($tmpWantedObject,$numOfWantedObjects);
  print "empty object count...\n";
  &updatePage($tmpEmptyObject,$numOfEmptyObjects);
  
  
  print "unknown count...\n";
  &updatePage($tmpUnknown,$numOfUnknownPages);
  print "help count...\n";
  &updatePage($tmpHelp,$numOfHelpPages);
  print "redirect count...\n";
  &updatePage($tmpRedir,$numOfRedirects);
  
   print "meta count...\n";
  &updatePage($tmpMeta,$numOfMetas);
 
  # Write time of update
  print "#Updating time stamp\n";
  &updatePage("Template:NumUpdateDate",$NetTime);
}



print "# ALL DONE. \n";
  
  
  
  
  
  
  






######################################################################
#######################################################################
#####################################################################
# DO NOT EDIT the code below, unless you want to
# Its for extracting time, in an excesive and unessesarally
# precice way
######################################################################
####################################################################
# Begining of rediculus method for geting the time
# 
sub bin2frac { # convert a binary string to fraction
    my @bin = split '', shift;
    my $frac = 0;
    while (@bin) {
      $frac = ($frac + pop @bin)/2;
    }
    $frac;
  } # end sub bin2frac
  sub frac2bin { # convert a fraction to binary string (B32)
    my $frac = shift;
    my $bin ="";
    while (length($bin) < 32) {
      $bin = $bin . int($frac*2);
      $frac = $frac*2 - int($frac*2);
    }
    $bin;
  } # end sub frac2bin
sub get_ntp_time {
  # open the connection to the ntp server,
  # prepare the ntp request packet
  # send and receive
  # take local timestamps before and after

    my ($remote);
    my ($rin, $rout, $eout) ="";
    my $ntp_msg;

    # open the connection to the ntp server
    $remote = IO::Socket::INET -> new(Proto => "udp", PeerAddr => $server,
                                      PeerPort => 123,
                                      Timeout => $timeout)
                                  or die "Can't connect to \"$server\"\n";
    # measure local time BEFORE timeserver query
    $LocalTime1 = time();
    # convert fm unix epoch time to NTP timestamp
    $LocalTime0 = $LocalTime1 + 2208988800;

    # prepare local timestamp for transmission in our request packet
    $LocalTime0F = $LocalTime0 - int($LocalTime0);
    $LocalTime0FB = frac2bin($LocalTime0F);
    $LocalTime0H = unpack("H8",(pack("N", int($LocalTime0))));
    $LocalTime0FH = unpack("H8",(pack("B32", $LocalTime0FB)));

    $ntp_msg = pack("B8 C3 N10 B32", '00011011', (0)x12, int($LocalTime0), $LocalTime0FB);
                   # LI=0, VN=3, Mode=3 (client), remainder msg is 12 nulls
                   # and the local TxTimestamp derived from $LocalTime1
    # send the ntp-request to the server
    $remote -> send($ntp_msg) or return undef;
    vec($rin, fileno($remote), 1) = 1;
    select($rout=$rin, undef, $eout=$rin, $timeout)
      or do {print "No answer from $server\n"; exit};

    # receive the ntp-message from the server
    $remote -> recv($ntp_msg, length($ntp_msg))
               or do {print "Receive error from $server ($!)\n"; exit};

    # measure local time AFTER timeserver query
    $LocalTime2 = time();
    $ntp_msg;
  } # end sub get_ntp_time------------------------------ 
  sub interpret_ntp_data {
  # do some interpretations of the data
    my $ntp_msg = shift;
    # unpack the received ntp-message into long integer and binary values
    ( $Byte1, $Stratum, $Poll, $Precision,
      $RootDelay, $RootDelayFB, $RootDisp, $RootDispFB, $ReferenceIdent,
      $ReferenceTime, $ReferenceTimeFB, $OriginateTime, $OriginateTimeFB,
      $ReceiveTime, $ReceiveTimeFB, $TransmitTime, $TransmitTimeFB) =
      unpack ("a C3   n B16 n B16 H8   N B32 N B32   N B32 N B32", $ntp_msg);

    # again unpack the received ntp-message into hex and ASCII values
    ( $dummy, $dummy, $dummy, $dummy,
      $RootDelayH, $RootDelayFH, $RootDispH, $RootDispFH, $ReferenceIdentT,
      $ReferenceTimeH, $ReferenceTimeFH, $OriginateTimeH, $OriginateTimeFH,
      $ReceiveTimeH, $ReceiveTimeFH, $TransmitTimeH, $TransmitTimeFH) =
      unpack ("a C3   H4 H4 H4 H4 a4   H8 H8 H8 H8   H8 H8 H8 H8", $ntp_msg);

    $LI = unpack("C", $Byte1 & "\xC0") >> 6;
    $VN = unpack("C", $Byte1 & "\x38") >> 3;
    $Mode = unpack("C", $Byte1 & "\x07");
    if ($Stratum < 2) {$sc = $Stratum;}
    else {
      if ($Stratum > 1) {
        if ($Stratum < 16) {$sc = 2;} else {$sc = 16;}
      }
    }
    $PollT = 2**($Poll);
    if ($Precision > 127) {$Precision = $Precision - 255;}
    $PrecisionV = sprintf("%1.4e",2**$Precision);
    $RootDelay += bin2frac($RootDelayFB);
    $RootDelay = sprintf("%.4f", $RootDelay);
    $RootDisp += bin2frac($RootDispFB);
    $RootDisp = sprintf("%.4f", $RootDisp);
    $ReferenceT = "";
    if ($Stratum eq 1) {$ReferenceT = "[$ReferenceIdentT]";}
    else {
      if ($Stratum eq 2) {
        if ($VN eq 3) {
          $ReferenceIPv4 = sprintf("%d.%d.%d.%d",unpack("C4",$ReferenceIdentT));
          $ReferenceT = "[32bit IPv4 address $ReferenceIPv4 of the ref src]";
        }
        else {
          if ($VN eq 4) {$ReferenceT = "[low 32bits of latest TX timestamp of reference src]";}
        }
      }
    }

    $ReferenceTime += bin2frac($ReferenceTimeFB);
    $OriginateTime += bin2frac($OriginateTimeFB);
    $ReceiveTime += bin2frac($ReceiveTimeFB);
    $TransmitTime += bin2frac($TransmitTimeFB);

  } # end sub interpret_ntp_data ----------------------------------
  sub calculate_time_data {
  # convert time stamps to unix epoch and do some calculations on the time data

    my ($sec, $min, $hr, $dy, $mo, $yr);

    $ReferenceTime -= 2208988800; # convert to unix epoch time stamp
    $OriginateTime -= 2208988800; 
    $ReceiveTime -= 2208988800; 
    $TransmitTime -= 2208988800; 

    $NetTime = scalar(gmtime $TransmitTime);
    $Netfraction = sprintf("%03.f",1000*sprintf("%.3f", $TransmitTime - int($TransmitTime)));
    ($sec, $min, $hr, $dy, $mo, $yr) = gmtime($TransmitTime);
    $NetTime2 = sprintf("%04d-%02d-%02d %02d:%02d:%02d", $yr+1900, $mo+1, $dy, $hr, $min, $sec);

    # calculate delay and difference
    $netround = sprintf("%+.4f",($LocalTime1 - $LocalTime2));
    $netdelay = sprintf("%+.4f",(($LocalTime1 - $LocalTime2)/2) - ($TransmitTime - $ReceiveTime));
    $off = sprintf("%+.4f",(($ReceiveTime - $LocalTime1) + ($TransmitTime - $LocalTime2))/2);

    $LocalTime = ($LocalTime1 + $LocalTime2) /2;
    $LocalTimeF = sprintf("%03.f",1000*sprintf("%.3f", $LocalTime - int($LocalTime)));
    ($sec, $min, $hr, $dy, $mo, $yr) = gmtime($LocalTime);
    $LocalTimeT = sprintf("%04d-%02d-%02d %02d:%02d:%02d", $yr+1900, $mo+1, $dy, $hr, $min, $sec);

  } # end sub calculate_time_data--------------------------------
  ########################End of Funtions############################


#################

Advertisement