perl


1円切手のメモ

Strawberry Perl for Windows ってポータブル版があるんですねぇ。便利!

nyaos http://www.nyaos.org/index.cgi?p=FrontPage.ja

===ダウンロードする部分===

use strict; use warnings; use autodie; use utf8; use Encode; use LWP::UserAgent?; binmode STDIN, ':encoding(cp932)'; binmode STDOUT, ':encoding(cp932)'; binmode STDERR, ':encoding(cp932)';

# Windows 8.1 32bit

# Mozilla/5.0 (Windows NT 6.3; rv:31.0) Gecko/20100101 Firefox/31.0

my $USERAGENT='Mozilla/5.0 (Windows NT 6.3; rv:31.0) Gecko/20100101 Firefox/31.0'; my $HTTP_PROXY="http://proxy.nex.west.ntt.co.jp:8080";

# 天窓 .comfort

# http://www.otonami.com/comfort/schedule/1408.htm

my $URL="http://www.otonami.com/comfort/schedule/1408.htm";

# $URL = 'http://www.ugtop.com/spill.shtml';

my $ua = LWP::UserAgent?->new(); ua->agent(USERAGENT); $ua->timeout( 30 ); ua->proxy('http', HTTP_PROXY);

#

# ホスト名部分を抽出

# ホスト名の後ろに、必ず / が入っていることを仮定 $URL =~ m%^.*//([^/]+)/.*%;

print "URL $URL\n"; print "ホスト部分 $1\n";

my http_res = ua->get($URL , ":content_file" => "Comfort.txt");

print "HTTP レスポンスコード $http_res\n";

===ダウンロードしたファイルをパースする部分===

# perl use strict; use warnings; use autodie; use utf8; use Encode; use Encode::Guess qw(euc-jp cp932 iso-2022-jp); use HTML::Entities ();

my $_ = find_encoding('cp932'); binmode STDIN, ':encoding(cp932)'; binmode STDOUT, ':encoding(cp932)'; binmode STDERR, ':encoding(cp932)';

my %day_of_week = (

	sun => "日", mon => "月", tue => "火", wed => "水",
	thu => "木", fri => "金", sat => "土"

); my %day_or_night = (

	day => '昼', night => '夜'

); my $do_print = 0; my (day, wday) = (); my $d_or_n = "";

while(<>){

	if (m#day_week_image/([0-3][0-9]).gif#){
		$day = $1;
	}
	if (m#day_week_image/([a-w]{3}).gif#){
		$wday = $day_of_week{$1};
	};
	if (m#contents_image/(day|night).gif#){
		$d_or_n = $day_or_night{$1};
	};
	if ( s#.* class="style20">## ){
		if (defined($day) or $d_or_n ne ""){
			printf("\n\n%d日 (%s) %s\n", $day, $wday, $d_or_n);
			($day, $wday) = ();
			$d_or_n = "";
		}
		$do_print = 1;
	}
	if ( m# valign="middle".*<hr.*/>|<div align="right"># ){
		$do_print = 0;
		next;
	}
	$_ = HTML::Entities::decode($_);
	s#<[^>]*>##g;
	s#\s\s+# #g;
	s#^\s*(.*)\s*$#\1#;
	if ( m#^$# ){ next; }
	if ( $do_print ){
		print "$_\n";
		next;
	}

}