<!DOCTYPE html> <html> <!-- Default style for pretty-print of text files --> <head><title>shonaiben_translator.rb</title> <style type="text/css"> pre {margin-left: 6ex;} pre span {counter-increment: linenum; text-align: right; position: relative;} pre span:before { position: absolute; right: 0px; background: #ddd; text-align: right; padding: 0 0.2ex; width: 5.5ex; margin-right: 0.5ex; content: counter(linenum) ":"; } </style> </head> <body> <p>[[ <a href="?showattc+article_m+280+raw">RAW TEXT</a>(DOWNLOAD here) ]]</p> <pre lang="en"> <span></span>#!/usr/koeki/bin/ruby <span></span># -*- coding: utf-8 -*- <span></span> <span></span>def levenshtein(string1, string2, memo={}) # レーベンシュタイン距離を計算するメソッド <span></span> return memo[[string1, string2]] if memo[[string1, string2]] <span></span> return string2.size if string1.empty? <span></span> return string1.size if string2.empty? <span></span> return levenshtein(string1[1..], string2[1..], memo) if string1[0] == string2[0] <span></span> min_dist = 1 + [levenshtein(string1[1..], string2, memo), <span></span> levenshtein(string1, string2[1..], memo), <span></span> levenshtein(string1[1..], string2[1..], memo) <span></span> ].min <span></span> memo[[string1, string2]] = min_dist <span></span> min_dist <span></span>end <span></span> <span></span>source = File.readlines("shounaiben.txt") # 庄内弁の文章をファイルから読み込み、配列に保存 <span></span>candidates = File.readlines("kyoutsuugo.txt") # 共通語の訳文をファイルから読み込み、配列に保存 <span></span> <span></span>i = 0 <span></span>while i < source.length # 庄内弁の文章ごとに同じ処理を繰り返す <span></span> dist2target = levenshtein(source[i].chomp, candidates[i].chomp) # 正解訳文への編集距離の計算 <span></span> min_dist = dist2target # 最短編集距離の初期値(=正解訳文への編集距離) <span></span> count_same_or_lower = 0 # 編集距離が正解への距離以下である文章の総数の初期値 <span></span> best_cand = candidates[i].chomp # 最短編集距離の訳文の初期値(=正解訳文) <span></span> for cand in candidates # それぞれの共通語の文章への編集距離を調べる <span></span> dist = levenshtein(source[i].chomp, cand.chomp) <span></span> if dist < min_dist <span></span> min_dist = dist <span></span> best_cand = cand <span></span> end <span></span> if dist <= dist2target <span></span> count_same_or_lower += 1 <span></span> end <span></span> end <span></span> printf("庄内弁の文章: %s\n", source[i].chomp) <span></span> printf("共通語の訳文(正解): %s\n", candidates[i].chomp) <span></span> printf("最短編集距離の訳文: %s\n", best_cand.chomp) <span></span> printf("正解訳文への編集距離: %d\n", dist2target) <span></span> printf("最短編集距離: %d\n", min_dist) <span></span> printf("編集距離が正解訳文への距離以下である文章の総数: %d\n", count_same_or_lower) <span></span> puts <span></span> i += 1 <span></span>end </pre> </body> </html>