#!/usr/bin/perl # Fetch a copy of the WBC's game list page to see which events have urls # (fetch for those that do) and to get event titles. # Bruno Wolff III # Last updated July 22, 2006 use LWP::UserAgent; use HTML::TokeParser; use charnames ':full'; $base = 'http://www.boardgamers.org/'; $ua = new LWP::UserAgent; my $req = new HTTP::Request GET => $base . 'gamelist.htm'; my $res = $ua->request($req); if (!$res->is_success) { print STDERR "Unable to fetch game list.\n"; exit; } if (!open(EVENT, '>wbcevent.new')) { print STDERR "Unable to open wbcevent.new.\n"; exit; } $p = HTML::TokeParser->new(\$res->content); $state = 0; while ($token = $p->get_token) { if ($state == 0) { if (${$token}[0] eq 'S' && ${$token}[1] eq 'th') { $state = 1; } } elsif ($state == 1) { if (${$token}[0] eq 'S' && ${$token}[1] eq 'tr') { $code = ''; $url = ''; $title = ''; $state = 2; } } elsif ($state == 2) { if (${$token}[0] eq 'S' && ${$token}[1] eq 'tr') { $state = 2; } elsif (${$token}[0] eq 'S' && ${$token}[1] eq 'td') { $state = 3; } } elsif ($state == 3) { if (${$token}[0] eq 'S' && ${$token}[1] eq 'tr') { if ($code ne '') { print EVENT "$code\t$title\t$url\n"; } else { print STDERR "Empty code in gamelist.htm\n"; } $code = ''; $url = ''; $title = ''; $state = 2; } elsif (${$token}[0] eq 'S' && ${$token}[1] eq 'td') { $state = 5; } elsif (${$token}[0] eq 'E' && ${$token}[1] eq 'td') { $state = 4; } elsif (${$token}[0] eq 'T') { $p->unget_token($token); $code = $p->get_trimmed_text; $code =~ s/\t/ /g; } } elsif ($state == 4) { if (${$token}[0] eq 'S' && ${$token}[1] eq 'tr') { if ($code ne '') { print EVENT "$code\t$title\t$url\n"; } else { print STDERR "Empty code in gamelist.htm\n"; } $code = ''; $url = ''; $title = ''; $state = 2; } elsif (${$token}[0] eq 'S' && ${$token}[1] eq 'td') { $state = 5; } } elsif ($state == 5) { if (${$token}[0] eq 'S' && ${$token}[1] eq 'tr') { if ($code ne '') { print EVENT "$code\t$title\t$url\n"; } else { print STDERR "Empty code in gamelist.htm\n"; } $code = ''; $url = ''; $title = ''; $state = 2; } elsif (${$token}[0] eq 'S' && ${$token}[1] eq 'td') { if ($code ne '') { print EVENT "$code\t$title\t$url\n"; } else { print STDERR "Empty code in gamelist.htm\n"; } $state = 1; } elsif (${$token}[0] eq 'E' && ${$token}[1] eq 'td') { if ($code ne '') { print EVENT "$code\t$title\t$url\n"; } else { print STDERR "Empty code in gamelist.htm\n"; } $state = 1; } elsif (${$token}[0] eq 'T') { $p->unget_token($token); $text = $p->get_trimmed_text; $text =~ s/\N{HORIZONTAL ELLIPSIS}/.../g; $text =~ s/\t/ /g; if ($text ne '') { if ($title eq '') { $title = $text; } else { $title .= ' ' . $text; } } } elsif (${$token}[0] eq 'S' && ${$token}[1] eq 'a') { if(defined(${$token}[2]{href})) { $url = ${$token}[2]{href}; $url =~ s;^/+;;; $url = $base . $url; $url =~ s/\t/ /g; } } } }