Newer
Older
Ruby / index.cgi
@KONNO Ayato KONNO Ayato on 1 Oct 2022 3 KB 2022-10-01 15:05:54
<!DOCTYPE html>
<html>
<!-- Default style for pretty-print of text files -->
<head><title>shonaiben_translator.rb</title>
<style type="text/css">
pre {margin-left: 6ex;}
pre span {counter-increment: linenum; text-align: right; position: relative;}
pre span:before {
    position: absolute; right: 0px;
    background: #ddd; text-align: right; padding: 0 0.2ex;
    width: 5.5ex; margin-right: 0.5ex;
    content: counter(linenum) ":";
}
</style>
</head>
<body>
<p>[[ <a href="?showattc+article_m+280+raw">RAW TEXT</a>(DOWNLOAD here) ]]</p>
<pre lang="en">
<span></span>#!/usr/koeki/bin/ruby
<span></span># -*- coding: utf-8 -*-
<span></span>
<span></span>def levenshtein&#040;string1, string2, memo={}) # レーベンシュタイン距離を計算するメソッド
<span></span>    return memo[[string1, string2]] if memo[[string1, string2]]
<span></span>    return string2.size if string1.empty?
<span></span>    return string1.size if string2.empty?
<span></span>    return levenshtein&#040;string1[1..], string2[1..], memo) if string1[0] == string2[0]
<span></span>    min_dist = 1 + [levenshtein&#040;string1[1..], string2, memo),
<span></span>					levenshtein&#040;string1, string2[1..], memo),
<span></span>					levenshtein&#040;string1[1..], string2[1..], memo)
<span></span>					].min
<span></span>    memo[[string1, string2]] = min_dist
<span></span>    min_dist
<span></span>end
<span></span>
<span></span>source = File.readlines&#040;&quot;shounaiben.txt&quot;) # 庄内弁の文章をファイルから読み込み、配列に保存
<span></span>candidates = File.readlines&#040;&quot;kyoutsuugo.txt&quot;) # 共通語の訳文をファイルから読み込み、配列に保存
<span></span>
<span></span>i = 0
<span></span>while i &lt; source.length # 庄内弁の文章ごとに同じ処理を繰り返す
<span></span>    dist2target = levenshtein&#040;source[i].chomp, candidates[i].chomp) # 正解訳文への編集距離の計算
<span></span>    min_dist = dist2target # 最短編集距離の初期値(=正解訳文への編集距離)
<span></span>    count_same_or_lower = 0 # 編集距離が正解への距離以下である文章の総数の初期値
<span></span>    best_cand = candidates[i].chomp # 最短編集距離の訳文の初期値(=正解訳文)
<span></span>    for cand in candidates # それぞれの共通語の文章への編集距離を調べる
<span></span>        dist = levenshtein&#040;source[i].chomp, cand.chomp)
<span></span>        if dist &lt; min_dist
<span></span>            min_dist = dist
<span></span>            best_cand = cand
<span></span>        end
<span></span>        if dist &lt;= dist2target
<span></span>            count_same_or_lower += 1
<span></span>        end
<span></span>    end
<span></span>    printf&#040;&quot;庄内弁の文章: %s\n&quot;, source[i].chomp)
<span></span>    printf&#040;&quot;共通語の訳文(正解): %s\n&quot;, candidates[i].chomp)
<span></span>    printf&#040;&quot;最短編集距離の訳文: %s\n&quot;, best_cand.chomp)
<span></span>    printf&#040;&quot;正解訳文への編集距離: %d\n&quot;, dist2target)
<span></span>    printf&#040;&quot;最短編集距離: %d\n&quot;, min_dist)
<span></span>    printf&#040;&quot;編集距離が正解訳文への距離以下である文章の総数: %d\n&quot;, count_same_or_lower)
<span></span>    puts
<span></span>    i += 1
<span></span>end
</pre>
</body>
</html>