#!/opt/perl-5.8.7-thread/bin/perl #Filename: allSearch.cgi #Jaehyun Paek #Objective: Find mathematician(s) in the different mathematical society's directories #Description: In quest to find an effective federeated searching strategy that would enable us to search through # directories of different mathematical society, Mason Macklem and I have discovered that all search can be done by # passing argument into URL of each society's directory. Based on that discovery, Mason and I have been trying to # implement a Perl program that would parse the user's query into URL; get the webpage from that URL; and extract # relevant information for the webpage and print out the information to the user. # This program is latest implementation of above strategy. The major difference between this version and # the previous version is in the way it prints the result. This program prints three table as an output. First # table shows the query the user have entered. Second table list the result by name with each name having links to # webpages containing the information pertaining to corresponding name from each directory returns given name, and # third table listing result by directories with number of result found in each directory # This program has two subroutines - Query and Result # - Query: Read the query from the user and pass it onto the result # - Result: pass the query into the url of each directory, convert the webpage returned into string # then format the result # This version search through following directories: Combined Member's List (CML), Canadian Mathematical # Society(CMS), French Mathematical Society (SMF), MathNet, Zuse Institute Berlin (ZIB) use CGI; use LWP::Simple; use LWP::UserAgent; use HTML::TokeParser; use 5.008; use threads; use lib '/var/http/htdocs/ddrive/fwdm/cgi-bin/'; require 'search.lib'; require 'aussie.lib'; require 'google.lib'; require 'merge.lib'; require 'process.lib'; require 'query_simple.lib'; require 'austrian.lib'; require 'portugal.lib'; #Subroutine query #This subroutine reads queries from the user. Some fields applies to most of directories, while others are found #only in a few directories sub query{ $searchform = get("http://www.cs.dal.ca/ddrive/fwdm/cgi-bin/searchsmall.txt"); print $searchform; } sub description{ $descript = get("http://www.cs.dal.ca/ddrive/fwdm/cgi-bin/description.txt"); print $descript; } sub leftmenu{ $leftmenu = get("http://www.cs.dal.ca/ddrive/fwdm/cgi-bin/logos.txt"); print $leftmenu; } #Subroutine to print result sub result{ my $lName = $cgi->param('lName'); my $fName = $cgi->param('fName'); my $num_page = $cgi->param('num'); my $res_page =$cgi->param('page'); my ($query_checksum, $search_url) = &simple_query($lName, $fName); my ($num, $names, $url, $home, $email, $query_url, $google_num, $google, $scholar_num, $scholar) = &process($search_url, $query_checksum, $lName,$fName); my ($ewdm_num, $cml_num, $cms_num, $math_net_num, $smf_num, $aussie_num, $aust_num, $portg_num) = @$num; my ($ewdm_names, $cml_names, $cms_names, $math_net_names, $smf_names, $aussie_names, $aust_names, $portg_names) = @$names; my ($ewdm_url, $cml_url, $cms_url, $math_net_url, $smf_url, $aussie_url, $aust_url, $portg_url) = @$url; my ($ewdm_email, $cml_email, $cms_email, $math_net_email, $smf_email, $aussie_emial, $aust_email, $portg_email) = @$email; my ($ewdm_home, $cml_home, $cms_home, $math_net_home, $smf_home, $aussie_home, $aust_home, $portg_home) = @$home; my ($name_array, $total_num, $size_name); my ($email_hash, $home_hash); my $aussie_home = "http://www.maths.anu.edu.au/other/ncms/wdm.html"; my $cml_only_table; my $email_checksum; my $email_and_home; my $temp_email_and_home; $total_num = $ewdm_num + $cml_num + $cms_num + $math_net_num + $smf_num + $aussie_num + $aust_num + $portg_num; print " "; print "\n"; if ($total_num > 0){ my $name_counter = ($res_page - 1) * $num_page; my ($curr_name, $cmp_name, $temp, $temp1, $cmp_temp, $cmp_temp1, $q, $q_cml); ($name_array, $cml_only_table) = &merge($ewdm_names,$cml_names, $cms_names, $math_net_names, $smf_names, $aussie_names, $aust_names, $portg_names); $email_hash = &merge_web($ewdm_email,$cml_email, $cms_email, $math_net_email, $smf_email, $aussie_email, $aust_email, $portg_email); $home_hash = &merge_web($ewdm_home,$cml_home, $cms_home, $math_net_home, $smf_home, $aussie_home, $aust_home, $portg_home); if ($name_counter < @$name_array || $name_counter < @$cml_only_table){ print ""; } # if ($name_counter < @$cml_only_table){ # print ""; # } print ""; print ""; print ""; #"; # print "
Search ResultsCML Only #ResultsResults by SocietyGoogle ResultsYour Query
Search Again
"; print "Search Again"; if ($name_counter < @$name_array || $name_counter < @$cml_only_table){ # print "To Top print "
\n"; print "

Search Results

\n"; print ""; print ""; while (($name_counter < ($res_page * $num_page)) && ($name_counter < @$name_array)){ $curr_name = $$name_array[$name_counter]; my $multi_email_addr; if (exists $$email_hash{$curr_name}){ $temp_email_and_home = $$email_hash{$curr_name}; $email_checksum = @$temp_email_and_home; if ($email_checksum > 1){ $multi_email_addr = join ", ", @$temp_email_and_home; print ""; print ""; my $google1 = &get_google($temp, $temp1); my $google2 = &get_google_scholar($temp, $temp1); print ""; } print "
$curr_name"; } else{ my $key = $$temp_email_and_home[0]; print "
"; } } else{ print "
"; } $curr_name =~ /([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+),\s*([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+)/; $temp = $1; $temp1 = $2; $q = 0; foreach (keys (%$cms_url)){ $cmp_name = $_; $cmp_name =~ /([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+),\s*([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+)/; $cmp_temp = $1; $cmp_temp1 = $2; if (normalize($temp) eq normalize($cmp_temp)){ if (normalize($temp1) eq normalize($cmp_temp1)){ $cmp_temp = $$cms_url{"$cmp_name"}; print "CMS
"; $q++; } } } foreach (keys (%$smf_url)){ $cmp_name = $_; $cmp_name =~ /([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+),\s*([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+)/; $cmp_temp = $1; $cmp_temp1 = $2; if (normalize($temp) eq normalize($cmp_temp)){ if (normalize($temp1) eq normalize($cmp_temp1)){ $cmp_temp = $$smf_url{"$cmp_name"}; print "SMF
"; $q++; } } } foreach (keys (%$math_net_url)){ $cmp_name = $_; $cmp_name =~ /([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+),\s*([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+)/; $cmp_temp = $1; $cmp_temp1 = $2; if (normalize($temp) eq normalize($cmp_temp)){ if (normalize($temp1) eq normalize($cmp_temp1)){ $cmp_temp = $$math_net_url{"$cmp_name"}; print "Math-Net
"; $q++; } } } foreach (keys (%$aust_url)){ $cmp_name = $_; $cmp_name =~ /([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+),\s*([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+)/; $cmp_temp = $1; $cmp_temp1 = $2; if (normalize($temp) eq normalize($cmp_temp)){ if (normalize($temp1) eq normalize($cmp_temp1)){ $cmp_temp = $$aust_url{"$cmp_name"}; print "OeMG
"; $q++; } } } foreach (keys (%$portg_url)){ $cmp_name = $_; $cmp_name =~ /([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+),\s*([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+)/; $cmp_temp = $1; $cmp_temp1 = $2; if (normalize($temp) eq normalize($cmp_temp)){ if (normalize($temp1) eq normalize($cmp_temp1)){ $cmp_temp = $$portg_url{"$cmp_name"}; print "PDM
"; $q++; } } } foreach (keys (%$ewdm_url)){ $cmp_name = $_; $cmp_name =~ /([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+),\s*([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+)/; $cmp_temp = $1; $cmp_temp1 = $2; if (normalize($temp) eq normalize($cmp_temp)){ if (normalize($temp1) eq normalize($cmp_temp1)){ $cmp_temp = $$ewdm_url{"$cmp_name"}; $cmp_temp1 = $$ewdm_home{"$cmp_name"}; print "EWDM
"; $q++; } } } foreach (keys (%$aussie_url)){ $cmp_name = $_; $cmp_name =~ /([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+),\s*([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+)/; $cmp_temp = $1; $cmp_temp1 = $2; if (normalize($temp) eq normalize($cmp_temp)){ if (normalize($temp1) eq normalize($cmp_temp1)){ $cmp_temp = $$aussie_url{"$cmp_name"}; print "NCMS
"; $q++; } } } $q_cml = 0; foreach (keys (%$cml_url)){ $cmp_name = $_; $cmp_name =~ /([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+),\s*([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+)/; $cmp_temp = $1; $cmp_temp1 = $2; if (normalize($temp) eq normalize($cmp_temp)){ if (normalize($temp1) eq normalize($cmp_temp1)){ if ($q_cml == 0 && $q > 0){ $cmp_temp = $$cml_url{"$cmp_name"}; print "CML"; $q_cml++; } elsif ($q > 0){ $q_cml++; } } } } if ($q_cml > 1 && $q > 0){ print "($q_cml)
"; } elsif ($q_cml == 1 && $q > 0){ print "
"; } $name_counter++; print "
"; $email_and_home = 1; if (exists $$home_hash{$curr_name}){ $temp_email_and_home = $$home_hash{$curr_name}; foreach my $key(@$temp_email_and_home){ if ($key ne "NONE"){ print "$email_and_home "; $email_and_home++; } else{ print "NONE"; } } } else{ print "NONE"; } print "Google
Google Scholar

"; } $name_counter = ($res_page - 1)*$num_page; if ($name_counter < @$cml_only_table){ # print "To Top


\n"; print "

CML Only Results

\n"; print ""; print ""; while (($name_counter < $res_page * $num_page) && ($name_counter < @$cml_only_table)){ $curr_name = $$cml_only_table[$name_counter]; if (exists $$email_hash{$curr_name}){ $temp_email_and_home = $$email_hash{$curr_name}; $email_checksum = @$temp_email_and_home; if ($email_checksum > 1){ $multi_email_addr = join ", ", @$temp_email_and_home; print ""; print ""; my $google1 = &get_google($temp, $temp1); my $google2 = &get_google_scholar($temp, $temp1); print ""; } print "
$curr_name "; } else{ my $key = $$temp_email_and_home[0]; print "
$curr_name"; } } else{ print "
$curr_name"; } $curr_name =~ /([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+),\s*([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+)/; $temp = $1; $temp1 = $2; $q_cml = 0; foreach (keys (%$cml_url)){ $cmp_name = $_; $cmp_name =~ /([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+),\s*([\wàèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÄËÏÖÜåÅæÆçÇøØðÐ-]+)/; $cmp_temp = $1; $cmp_temp1 = $2; if (normalize($temp) eq normalize($cmp_temp)){ if (normalize($temp1) eq normalize($cmp_temp1)){ if ($q_cml == 0){ $cmp_temp = $$cml_url{"$cmp_name"}; print "CML"; $q_cml++; } else{ $q_cml++; } } } } if ($q_cml > 1){ print "($q_cml)
"; } else{ print "
"; } $name_counter++; print "
"; $email_and_home = 1; if (exists $$home_hash{$curr_name}){ $temp_email_and_home = $$home_hash{$curr_name}; foreach my $key(@$temp_email_and_home){ print "$email_and_home
"; $email_and_home++; } } else{ print "NONE"; } print "
Google
Google Scholar

"; } $size_num = @$name_array; my $cml_size_num = @$cml_only_table; my $total_size_num = $size_num + $cml_size_num; print "To Top


\n"; print "

Search Results by Society

\n"; print ""; print ""; print ""; print ""; if ($ewdm_num > 0){ print ""; } if ($cml_num > 0){ print ""; } if ($cms_num > 0){ print ""; } if ($math_net_num > 0){ print ""; } if ($smf_num > 0){ print ""; } if ($aust_num >0){ print ""; } if ($portg_num >0){ print ""; } if ($aussie_num > 0){ print ""; } print "
Total: $total_size_num names found
Total(CML-only): $cml_size_num names found
EWDM: $ewdm_num Results
CML: $cml_num Results
CMS:$cms_num Results
Math-Net: $math_net_num Results
SMF: $smf_num Results
OeMG: $aust_num Results
PDM: $portg_num Results
NCMS: $aussie_num Results

"; print ""; print ""; print "
"; if ($cms_num eq 0){ print "CMS / "; } if ($cml_num eq 0){ print "CML / "; } if ($smf_num < 1){ print "SMF / "; } if ($math_net_num eq 0){ print "Math-Net / "; } if (!defined $aust_num){ print "OeMG / "; } if (!defined $portg_num){ print "Portuguese / "; } if ($ewdm_num eq 0){ print "EWDM / "; } if ($aussie_num eq 0){ print "NCMS"; } print "

"; print "To Top


\n"; print " "; if ($$google_num){ print "
Found $$google_num results from Google
"; } else{ print "
No results found in Google
"; } if ($$scholar_num){ print "
Found $$scholar_num results from Google Scholar
"; } else{ print "
No results found in Google Scholar
"; } } else{ print "Search Results by Society"; print "Google Results"; print "Your Query"; #"; # print "Search Again"; print "
No Results Found
"; print ""; print "To Top
\n"; print " "; print "

"; print "To Top


\n"; print " "; print "
Found $$google_num results from Google
"; print "
Found $$scholar_num results from Google Scholar
"; } print "

Your Query

\n"; print "
"; print ""; print ""; print "\n"; print "\n"; print "\n"; print "\n"; print ""; print "
First Name:
Last Name:
Results per Page:
"; print "
"; print ""; print "
"; print "
"; print "

"; my $page_num; if (($num_page > @$name_array) && ($num_page > @$cml_only_table)){ $page_num = 1; } else{ if (@$name_array > @$cml_only_table){ $page_num = @$name_array / $num_page; if ((@$name_array % $num_page) != 0){ $page_num++; } } else{ $page_num = @$cml_only_table / $num_page; if ((@$cml_only_table % $num_page) != 0){ $page_num++; } } } print "\n"; print "
Standard Search Advanced Search
\n"; print "\n


\n"; my $prev_page = $res_page-1; my $next_page = $res_page+1; print "

"; print "\n"; $page_num = int($page_num); print "$res_page/$page_num"; print "
"; print "\n"; my $i; if ($res_page > 1){ print "Prev "; } print ""; for ($i = 1; $i <= $page_num; $i++){ print "$i "; } if ($res_page < $page_num){ print "Next "; } print "
"; print "
"; } #Main program $cgi=new CGI; print $cgi->header; $htmlhead = "\n \n \n D·Drive: Federated World Directory of Mathematicians\n \n \n \n \n \n \n \n"; print $htmlhead; $topmenu = get("http://www.cs.dal.ca/ddrive/fwdm/ceictop.txt"); print $topmenu; $htmlmid = "
\n

D·Drive HomeFWDM

\n
\n
\n"; print $htmlmid; &leftmenu; #$leftmenu = get("http://www.cs.dal.ca/ddrive/fwdm/leftmenu.txt"); #print $leftmenu; my $action=$cgi->param('action'); print "
\n"; if ($action eq "") { &query; } elsif ($action eq "result") { &result; } print "

\n"; &description; print "

\n"; $footer = get("http://www.cs.dal.ca/ddrive/bottom.txt"); print $footer; print "\n";