#!/opt/perl-5.8.7-thread/bin/perl #Filename: allSearch.cgi #Jaehyun Paek #Objective: Find mathematician(s) in the different mathematical society's directories #Description: In quest to find an effective federeated searching strategy that would enable us to search through # directories of different mathematical society, Mason Macklem and I have discovered that all search can be done by # passing argument into URL of each society's directory. Based on that discovery, Mason and I have been trying to # implement a Perl program that would parse the user's query into URL; get the webpage from that URL; and extract # relevant information for the webpage and print out the information to the user. # This program is latest implementation of above strategy. The major difference between this version and # the previous version is in the way it prints the result. This program prints three table as an output. First # table shows the query the user have entered. Second table list the result by name with each name having links to # webpages containing the information pertaining to corresponding name from each directory returns given name, and # third table listing result by directories with number of result found in each directory # This program has two subroutines - Query and Result # - Query: Read the query from the user and pass it onto the result # - Result: pass the query into the url of each directory, convert the webpage returned into string # then format the result # This version search through following directories: Combined Member's List (CML), Canadian Mathematical # Society(CMS), French Mathematical Society (SMF), MathNet, Zuse Institute Berlin (ZIB) use CGI; use LWP::Simple; use LWP::UserAgent; use HTML::TokeParser; use 5.008; use threads; use lib '/var/http/htdocs/ddrive/fwdm/cgi-bin/'; require 'search.lib'; require 'aussie.lib'; require 'google.lib'; require 'merge.lib'; require 'process.lib'; require 'query_simple.lib'; require 'austrian.lib'; require 'portugal.lib'; #Subroutine query #This subroutine reads queries from the user. Some fields applies to most of directories, while others are found #only in a few directories sub query{ $searchform = get("http://www.cs.dal.ca/ddrive/fwdm/cgi-bin/searchsmall.txt"); print $searchform; } sub description{ $descript = get("http://www.cs.dal.ca/ddrive/fwdm/cgi-bin/description.txt"); print $descript; } sub leftmenu{ $leftmenu = get("http://www.cs.dal.ca/ddrive/fwdm/cgi-bin/logos.txt"); print $leftmenu; } #Subroutine to print result sub result{ my $lName = $cgi->param('lName'); my $fName = $cgi->param('fName'); my $num_page = $cgi->param('num'); my $res_page =$cgi->param('page'); my ($query_checksum, $search_url) = &simple_query($lName, $fName); my ($num, $names, $url, $home, $email, $query_url, $google_num, $google, $scholar_num, $scholar) = &process($search_url, $query_checksum, $lName,$fName); my ($ewdm_num, $cml_num, $cms_num, $math_net_num, $smf_num, $aussie_num, $aust_num, $portg_num) = @$num; my ($ewdm_names, $cml_names, $cms_names, $math_net_names, $smf_names, $aussie_names, $aust_names, $portg_names) = @$names; my ($ewdm_url, $cml_url, $cms_url, $math_net_url, $smf_url, $aussie_url, $aust_url, $portg_url) = @$url; my ($ewdm_email, $cml_email, $cms_email, $math_net_email, $smf_email, $aussie_emial, $aust_email, $portg_email) = @$email; my ($ewdm_home, $cml_home, $cms_home, $math_net_home, $smf_home, $aussie_home, $aust_home, $portg_home) = @$home; my ($name_array, $total_num, $size_name); my ($email_hash, $home_hash); my $aussie_home = "http://www.maths.anu.edu.au/other/ncms/wdm.html"; my $cml_only_table; my $email_checksum; my $email_and_home; my $temp_email_and_home; $total_num = $ewdm_num + $cml_num + $cms_num + $math_net_num + $smf_num + $aussie_num + $aust_num + $portg_num; print " "; print "
| Search Results | "; } # if ($name_counter < @$cml_only_table){ # print "CML Only #Results | "; # } print "Results by Society | "; print "Google Results | "; print "Your Query | "; #"; # print "
| Search Again |
| $curr_name | "; } else{ my $key = $$temp_email_and_home[0]; print " | ||
| "; } } else{ print " | |||
| ";
}
$curr_name =~
/([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+),\s*([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+)/;
$temp = $1;
$temp1 = $2;
$q = 0;
foreach (keys (%$cms_url)){
$cmp_name = $_;
$cmp_name =~
/([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+),\s*([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+)/;
$cmp_temp = $1;
$cmp_temp1 = $2;
if (normalize($temp) eq normalize($cmp_temp)){
if (normalize($temp1) eq normalize($cmp_temp1)){
$cmp_temp = $$cms_url{"$cmp_name"};
print "CMS "; $q++; } } } foreach (keys (%$smf_url)){ $cmp_name = $_; $cmp_name =~ /([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+),\s*([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+)/; $cmp_temp = $1; $cmp_temp1 = $2; if (normalize($temp) eq normalize($cmp_temp)){ if (normalize($temp1) eq normalize($cmp_temp1)){ $cmp_temp = $$smf_url{"$cmp_name"}; print "SMF "; $q++; } } } foreach (keys (%$math_net_url)){ $cmp_name = $_; $cmp_name =~ /([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+),\s*([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+)/; $cmp_temp = $1; $cmp_temp1 = $2; if (normalize($temp) eq normalize($cmp_temp)){ if (normalize($temp1) eq normalize($cmp_temp1)){ $cmp_temp = $$math_net_url{"$cmp_name"}; print "Math-Net "; $q++; } } } foreach (keys (%$aust_url)){ $cmp_name = $_; $cmp_name =~ /([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+),\s*([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+)/; $cmp_temp = $1; $cmp_temp1 = $2; if (normalize($temp) eq normalize($cmp_temp)){ if (normalize($temp1) eq normalize($cmp_temp1)){ $cmp_temp = $$aust_url{"$cmp_name"}; print "OeMG "; $q++; } } } foreach (keys (%$portg_url)){ $cmp_name = $_; $cmp_name =~ /([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+),\s*([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+)/; $cmp_temp = $1; $cmp_temp1 = $2; if (normalize($temp) eq normalize($cmp_temp)){ if (normalize($temp1) eq normalize($cmp_temp1)){ $cmp_temp = $$portg_url{"$cmp_name"}; print "PDM "; $q++; } } } foreach (keys (%$ewdm_url)){ $cmp_name = $_; $cmp_name =~ /([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+),\s*([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+)/; $cmp_temp = $1; $cmp_temp1 = $2; if (normalize($temp) eq normalize($cmp_temp)){ if (normalize($temp1) eq normalize($cmp_temp1)){ $cmp_temp = $$ewdm_url{"$cmp_name"}; $cmp_temp1 = $$ewdm_home{"$cmp_name"}; print "EWDM "; $q++; } } } foreach (keys (%$aussie_url)){ $cmp_name = $_; $cmp_name =~ /([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+),\s*([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+)/; $cmp_temp = $1; $cmp_temp1 = $2; if (normalize($temp) eq normalize($cmp_temp)){ if (normalize($temp1) eq normalize($cmp_temp1)){ $cmp_temp = $$aussie_url{"$cmp_name"}; print "NCMS "; $q++; } } } $q_cml = 0; foreach (keys (%$cml_url)){ $cmp_name = $_; $cmp_name =~ /([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+),\s*([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+)/; $cmp_temp = $1; $cmp_temp1 = $2; if (normalize($temp) eq normalize($cmp_temp)){ if (normalize($temp1) eq normalize($cmp_temp1)){ if ($q_cml == 0 && $q > 0){ $cmp_temp = $$cml_url{"$cmp_name"}; print "CML"; $q_cml++; } elsif ($q > 0){ $q_cml++; } } } } if ($q_cml > 1 && $q > 0){ print "($q_cml) "; } elsif ($q_cml == 1 && $q > 0){ print " "; } $name_counter++; print " | ";
print ""; $email_and_home = 1; if (exists $$home_hash{$curr_name}){ $temp_email_and_home = $$home_hash{$curr_name}; foreach my $key(@$temp_email_and_home){ if ($key ne "NONE"){ print "$email_and_home "; $email_and_home++; } else{ print "NONE"; } } } else{ print "NONE"; } print " | "; my $google1 = &get_google($temp, $temp1); my $google2 = &get_google_scholar($temp, $temp1); print "Google Google Scholar |
"; } $name_counter = ($res_page - 1)*$num_page; if ($name_counter < @$cml_only_table){ # print "To Top
| $curr_name | "; } else{ my $key = $$temp_email_and_home[0]; print " | ||
| $curr_name | "; } } else{ print " | ||
| $curr_name | ";
}
$curr_name =~
/([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+),\s*([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+)/;
$temp = $1;
$temp1 = $2;
$q_cml = 0;
foreach (keys (%$cml_url)){
$cmp_name = $_;
$cmp_name =~
/([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+),\s*([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+)/;
$cmp_temp = $1;
$cmp_temp1 = $2;
if (normalize($temp) eq normalize($cmp_temp)){
if (normalize($temp1) eq normalize($cmp_temp1)){
if ($q_cml == 0){
$cmp_temp = $$cml_url{"$cmp_name"};
print "CML";
$q_cml++;
}
else{
$q_cml++;
}
}
}
}
if ($q_cml > 1){
print "($q_cml) "; } else{ print " "; } $name_counter++; print " | ";
print "";
$email_and_home = 1;
if (exists $$home_hash{$curr_name}){
$temp_email_and_home = $$home_hash{$curr_name};
foreach my $key(@$temp_email_and_home){
print "$email_and_home "; $email_and_home++; } } else{ print "NONE"; } print " | ";
my $google1 = &get_google($temp, $temp1);
my $google2 = &get_google_scholar($temp, $temp1);
print "Google Google Scholar |
"; } $size_num = @$name_array; my $cml_size_num = @$cml_only_table; my $total_size_num = $size_num + $cml_size_num; print "To Top
| Total: | $total_size_num names found |
| Total(CML-only): | $cml_size_num names found |
| EWDM: | $ewdm_num Results |
| CML: | $cml_num Results |
| CMS: | $cms_num Results |
| Math-Net: | $math_net_num Results |
| SMF: | $smf_num Results |
| OeMG: | $aust_num Results |
| PDM: | $portg_num Results |
| NCMS: | $aussie_num Results |
"; print "
"; print "To Top
"; print "To Top
"; my $page_num; if (($num_page > @$name_array) && ($num_page > @$cml_only_table)){ $page_num = 1; } else{ if (@$name_array > @$cml_only_table){ $page_num = @$name_array / $num_page; if ((@$name_array % $num_page) != 0){ $page_num++; } } else{ $page_num = @$cml_only_table / $num_page; if ((@$cml_only_table % $num_page) != 0){ $page_num++; } } } print "
| Standard Search | Advanced Search |
\n";
my $prev_page = $res_page-1;
my $next_page = $res_page+1;
print "
\n"; &description; print "