#!/usr/bin/perl
use strict;
use warnings;
use utf8;
use Encode;
use Unicode::Japanese;
use Unicode::Normalize;
use HTML::TreeBuilder;
use MeCab;
use HTML::TagCloud;
use URI::Escape;

binmode STDOUT, ':utf8';

my $count = 0;
print header();

opendir(my $dh, '.') or die;
foreach my $file (readdir($dh)) {
	if ($file =~ /^\d{8}\.html$/) {
		print STDERR "processing $file...\n";
		parsefile($file, ++$count);
	}
}
closedir($dh);

print footer($count);

sub parsefile
{
	my $file = shift;
	my $count = shift;

	open(my $fh, $file) or die;
	our $/ = undef;
	my $data = <$fh>;
	close($fh);

	my $tree = new HTML::TreeBuilder;
	$tree->parse(NFKC(Unicode::Japanese->new($data, 'auto')->getu));
	my $text = $tree->look_down('id', 'content2')->as_text;

	# 記号は空白にしちゃうお
	$text =~ s/[\x00-\x2F\x3A-\x40\x5B-\x60\x7B-\x7F]/ /g;

	my %words;
	my $mecab = MeCab::Tagger->new;
	my $node = $mecab->parseToNode(encode('EUC-JP', $text));
	while ($node = $node->{next}) {
		my $midasi = decode('EUC-JP', $node->{surface});
		my @hinsi = split ',', decode('EUC-JP', $node->{feature});
		if ($hinsi[0] eq '名詞' && $hinsi[1] !~ /(非自立|代名詞|接尾)/) {
			$words{$midasi}++;
		}
		
	}

	my $cloud = HTML::TagCloud->new;
	while (my ($word, $count) = each(%words)) {
		$cloud->add($word, '#' . uri_escape(encode 'UTF-8', $word), $count);
	}
	my $html = $cloud->html(200);
	$html =~ s/id="htmltagcloud"/class="htmltagcloud"/;

	my $date = sprintf('%04d/%02d/%02d', ($file =~ /^(\d{4})(\d{2})(\d{2})/));
	print <<"EOD";
<div class="tl" id="tl_$count">
<h2>$date</h2>
EOD

	print $html;

	print <<"EOD";
</div>
EOD

}

sub header
{
	return <<EOD;
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
<html lang="ja">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta http-equiv="Content-Style-Type" content="text/css">
<meta http-equiv="Content-Script-Type" content="text/javascript">
<title>横浜市長施政方針演説タグクラウド</title>
<script type="text/javascript" src="range.js"></script>
<script type="text/javascript" src="timer.js"></script>
<script type="text/javascript" src="slider.js"></script>
<link type="text/css" rel="StyleSheet" href="winclassic.css">
<style type="text/css"><!--
.tl { display: none }
body { font-size: 12px }
.footer { font-size: 10px }
span.tagcloud0 { font-size: 12px;}
span.tagcloud0 a {text-decoration: none;}
span.tagcloud1 { font-size: 13px;}
span.tagcloud1 a {text-decoration: none;}
span.tagcloud2 { font-size: 14px;}
span.tagcloud2 a {text-decoration: none;}
span.tagcloud3 { font-size: 15px;}
span.tagcloud3 a {text-decoration: none;}
span.tagcloud4 { font-size: 16px;}
span.tagcloud4 a {text-decoration: none;}
span.tagcloud5 { font-size: 17px;}
span.tagcloud5 a {text-decoration: none;}
span.tagcloud6 { font-size: 18px;}
span.tagcloud6 a {text-decoration: none;}
span.tagcloud7 { font-size: 19px;}
span.tagcloud7 a {text-decoration: none;}
span.tagcloud8 { font-size: 20px;}
span.tagcloud8 a {text-decoration: none;}
span.tagcloud9 { font-size: 21px;}
span.tagcloud9 a {text-decoration: none;}
span.tagcloud10 { font-size: 22px;}
span.tagcloud10 a {text-decoration: none;}
span.tagcloud11 { font-size: 23px;}
span.tagcloud11 a {text-decoration: none;}
span.tagcloud12 { font-size: 24px;}
span.tagcloud12 a {text-decoration: none;}
span.tagcloud13 { font-size: 25px;}
span.tagcloud13 a {text-decoration: none;}
span.tagcloud14 { font-size: 26px;}
span.tagcloud14 a {text-decoration: none;}
span.tagcloud15 { font-size: 27px;}
span.tagcloud15 a {text-decoration: none;}
span.tagcloud16 { font-size: 28px;}
span.tagcloud16 a {text-decoration: none;}
span.tagcloud17 { font-size: 29px;}
span.tagcloud17 a {text-decoration: none;}
span.tagcloud18 { font-size: 30px;}
span.tagcloud18 a {text-decoration: none;}
span.tagcloud19 { font-size: 31px;}
span.tagcloud19 a {text-decoration: none;}
span.tagcloud20 { font-size: 32px;}
span.tagcloud20 a {text-decoration: none;}
span.tagcloud21 { font-size: 33px;}
span.tagcloud21 a {text-decoration: none;}
span.tagcloud22 { font-size: 34px;}
span.tagcloud22 a {text-decoration: none;}
span.tagcloud23 { font-size: 35px;}
span.tagcloud23 a {text-decoration: none;}
span.tagcloud24 { font-size: 36px;}
span.tagcloud24 a {text-decoration: none;}
--></style>
</head>
<body>
<h1>横浜市長施政方針演説タグクラウド</h1>
<p class="description">Inspired by <a href="http://blog.bulknews.net/PMTagCloud/">Japanese Prime Minister Speeches Tag Cloud</a>.</p>
<div class="slider" id="slider-1">
<input class="slider-input" id="slider-input-1" name="slider-input-1">
</div>
EOD
}

sub footer
{
	my $count = shift;
	return <<"EOD";
<script type="text/javascript">
<!--
 var s = new Slider(document.getElementById("slider-1"), document.getElementById("slider-input-1"));
 s.setMinimum(1);
 s.setMaximum($count);
 s.setValue(s.getMaximum());
 s.onchange = function () { sv = s.getValue(); if(sv != last_tl) { document.getElementById('tl_'+sv).style.display = 'block'; document.getElementById('tl_'+last_tl).style.display = 'none'; last_tl = sv; } };
 var last_tl = s.getValue();
 document.getElementById('tl_'+last_tl).style.display = 'block';
// -->
</script>
</body>
</html>
EOD
}
