東風と半荘の基礎データ比較

東風戦と半荘戦の副露率がどれくらい違うか知りたかったけどぱっと見データが見当たらなかったので自分で調べた。

サンプル:2019年の四鳳東喰赤と四鳳南喰赤の約2000ゲームをランダムサンプリング
 
  東風 半荘 t検定(p値)
ゲーム数 1977 1998  
局数 5.651 10.655  
和了率 0.217 0.212 0.097
放銃率 0.129 0.125 0.074
リーチ率 0.165 0.177 0.000
副露率 0.377 0.337 0.000
副露回数 3.202 5.336  
副露回数/局数 0.570 0.499  
流局率 0.140 0.156 0.000

 

天鳳の公式のデータを見ていても小数点以下3桁目くらいはぶれてそう。
リーチ率、副露率、流局率は有意に違う。副露率が上がるとリーチ率は下がるし流局率も下がりそうだから納得か。まぁその理屈だと和了率も差があってよさそうだけど……。
結局副露率は4%くらい違った!自分の普段の副露率+4%くらいを目安にしてみるか……。
東風戦は半荘戦における平場の南1と言われることもあり、半荘でも東風でも、局が進むごとに大雑把には副露率が上がる傾向があるかもしれない。そのあたりも調べたいかも。
あとは平均打点とか和了巡目とかも変わりそう。
 
追記に特に意味もなくGoogle Colaboratoryで動くコードをおいておきます。なぜかrequestでmjlogの中身をとってこれなくてseleniumを使ったのが大変だった。

# module
 
!pip install tqdm
!pip install japanize-matplotlib
 
import os
import pickle
import random
import re
import time
 
import japanize_matplotlib
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import stats
import tarfile
from tqdm import tqdm
import xml.etree.ElementTree as ET
 
# selenium準備
 
%%shell
 
cat > /etc/apt/sources.list.d/debian.list <<'EOF'
deb [arch=amd64 signed-by=/usr/share/keyrings/debian-buster.gpg] http://deb.debian.org/debian buster main
deb [arch=amd64 signed-by=/usr/share/keyrings/debian-buster-updates.gpg] http://deb.debian.org/debian buster-updates main
deb [arch=amd64 signed-by=/usr/share/keyrings/debian-security-buster.gpg] http://deb.debian.org/debian-security buster/updates main
EOF
 
 
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys DCC9EFBF77E11517
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 648ACFD622F3D138
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 112695A0E562B32A
 
apt-key export 77E11517 | gpg --dearmour -o /usr/share/keyrings/debian-buster.gpg
apt-key export 22F3D138 | gpg --dearmour -o /usr/share/keyrings/debian-buster-updates.gpg
apt-key export E562B32A | gpg --dearmour -o /usr/share/keyrings/debian-security-buster.gpg
 
cat > /etc/apt/preferences.d/chromium.pref << 'EOF'
Package: *
Pin: release a=eoan
Pin-Priority: 500
 
Package: *
Pin: origin "deb.debian.org"
Pin-Priority: 300
 
Package: chromium*
Pin: origin "deb.debian.org"
Pin-Priority: 700
EOF
 
apt-get update
apt-get install chromium chromium-driver
 
pip install -q selenium
 
from selenium import webdriver
from selenium.webdriver.common.by import By
 
# ブラウザをheadlessモード実行
options = webdriver.ChromeOptions()
#ヘッドレスモード(バックグラウンドで起動)で実行。コラボの場合、必須。
options.add_argument('--headless')
#サンドボックスモードの解除。これも必須。
options.add_argument('--no-sandbox')
#これも設定した方がよい。
options.add_argument('--disable-dev-shm-usage')
 
# logからidを取り出す
 
hantyan_id_list_path = os.path.join("/content/drive/MyDrive/mahjong/tenhou", "hantyan_id_list.txt")
tonpu_id_list_path = os.path.join("/content/drive/MyDrive/mahjong/tenhou", "tonpu_id_list.txt")
 
path = "/content/drive/MyDrive/mahjong/tenhou/log_danisen/log_2019"
len(os.listdir(path))
 
hantyan_id_list = []
tonpu_id_list = []
 
for folder in os.listdir(path):
    for html_file in os.listdir(os.path.join(path, folder)):
        html_file_path = os.path.join(path, folder, html_file)
        with open(html_file_path, encoding='utf-8') as f:
            log_html = f.read()
        log_list = log_html.split("\n")
        for log in log_list:
            if re.search("四鳳東喰赤", log):
                id = re.findall('log=(.*)"', log)[0]
                tonpu_id_list.append(id)
            elif re.search("四鳳南喰赤", log):
                id = re.findall('log=(.*)"', log)[0]
                hantyan_id_list.append(id)
            else:
                pass
 
print("半荘", len(hantyan_id_list), "東風", len(tonpu_id_list))
 
f = open(hantyan_id_list_path, 'wb')
pickle.dump(hantyan_id_list, f)
f = open(tonpu_id_list_path, 'wb')
pickle.dump(hantyan_id_list, f)
 
# 収集
 
f = open("./list.txt","rb")
list_row = pickle.load(f)
 
#インスタンス化
driver = webdriver.Chrome('chromedriver',options=options)
#指定したドライバーが見つかるまで待機
driver.implicitly_wait(10)
 
#urlの指定
moto_url = r"https://tenhou.net/0/log/?"
sample_size = 1000
# random.seed(345)
random.seed(456)
 
for mode in ["tonpu", "hantyan"]:
    if mode=="tonpu":
        id_list = random.sample(tonpu_id_list, sample_size)
        for id in tqdm(id_list):
            url = moto_url + id
            driver.get(url)
            element = driver.find_element(By.TAG_NAME, "body")
            folder_path = os.path.join("/content/drive/MyDrive/mahjong/tenhou", "houou_19_tonpu")
            with open(os.path.join(folder_path, id + ".mjlog") ,mode='w') as f:
                f.write(element.text)
            time.sleep(1)
    else:
        id_list = random.sample(hantyan_id_list, sample_size)
        for id in tqdm(id_list):
            url = moto_url + id
            driver.get(url)
            element = driver.find_element(By.TAG_NAME, "body")
            folder_path = os.path.join("/content/drive/MyDrive/mahjong/tenhou", "houou_19_hantyan")
            with open(os.path.join(folder_path, id + ".mjlog") ,mode='w') as f:
                f.write(element.text)
            time.sleep(1)
 
# mjlog整形
 
folder_path_tonpu = os.path.join("/content/drive/MyDrive/mahjong/tenhou", "houou_19_tonpu")
folder_path_hantyan = os.path.join("/content/drive/MyDrive/mahjong/tenhou", "houou_19_hantyan")
 
print(len(set(os.listdir(folder_path_tonpu)))==len(os.listdir(folder_path_tonpu)))
print(len(set(os.listdir(folder_path_hantyan)))==len(os.listdir(folder_path_hantyan)))
 
class Data:
    def __init__(self):
        self.kyoku_num_array = np.array([])
        self.agari_rate_array = np.array([])
        self.hoju_rate_array = np.array([])
        self.dama_rate_array = np.array([])
        self.reach_rate_array = np.array([])
        self.furo_rate_array = np.array([])
        self.furo_num_array = np.array([])
        self.furo_num_per_kyoku_array = np.array([])
        self.tsumo_rate_array = np.array([])
        self.agari_point_array = np.array([])
        self.agari_turn_array = np.array([])
        self.ryukyoku_rate_array = np.array([])
        self.ryukyoku_tenpai_rate_array = np.array([])
 
    def set_game_data(self):
        self.kyoku_num = 0
        self.agari_game_array = np.array([0, 0, 0, 0])
        self.hoju_game_array = np.array([0, 0, 0, 0])
        self.dama_game_array = np.array([0, 0, 0, 0])
        self.reach_game_array = np.array([0, 0, 0, 0])
        self.furo_boolen_game_array = np.array([0, 0, 0, 0])
        self.furo_num_game_array = np.array([0, 0, 0, 0])
        self.tsumo_game_array = np.array([0, 0, 0, 0])
        self.agari_point_game_array = np.array([0, 0, 0, 0])
        self.agari_turn_num = 0
        self.ryukyoku_num = 0
        self.ryukyoku_tenpai_game_array = np.array([0, 0, 0, 0])
 
    def set_kyoku_data(self):
        self.agari_kyoku_array = np.array([0, 0, 0, 0])
        self.hoju_kyoku_array = np.array([0, 0, 0, 0])
        self.dama_kyoku_array = np.array([0, 0, 0, 0])
        self.reach_kyoku_array = np.array([0, 0, 0, 0])
        self.furo_boolen_kyoku_array = np.array([0, 0, 0, 0])
        self.furo_num_kyoku_array = np.array([0, 0, 0, 0])
        self.tsumo_kyoku_array = np.array([0, 0, 0, 0])
        self.agari_point_kyoku_array = np.array([0, 0, 0, 0])
        self.ryukyoku_tenpai_kyoku_array = np.array([0, 0, 0, 0])
 
    def cumsum_kyoku_data(self):
        self.agari_game_array += self.agari_kyoku_array
        self.hoju_game_array += self.hoju_kyoku_array
        self.dama_game_array += self.dama_kyoku_array
        self.reach_game_array += self.reach_kyoku_array
        self.furo_boolen_game_array += self.furo_boolen_kyoku_array
        self.furo_num_game_array += self.furo_num_kyoku_array
        self.tsumo_game_array += self.tsumo_kyoku_array
        self.agari_point_game_array += self.agari_point_kyoku_array
        self.ryukyoku_tenpai_game_array += self.ryukyoku_tenpai_kyoku_array
 
    def aggregate_data(self):
        self.kyoku_num_array = np.hstack([self.kyoku_num_array, np.array(self.kyoku_num)])
        self.agari_rate_array = np.hstack([self.agari_rate_array, self.agari_game_array / self.kyoku_num])
        self.hoju_rate_array = np.hstack([self.hoju_rate_array, self.hoju_game_array / self.kyoku_num])
        self.dama_rate_array = np.hstack([self.dama_rate_array, self.dama_game_array / self.kyoku_num])
        self.reach_rate_array = np.hstack([self.reach_rate_array, self.reach_game_array / self.kyoku_num])
        self.furo_rate_array = np.hstack([self.furo_rate_array, self.furo_boolen_game_array / self.kyoku_num])
        self.furo_num_array = np.hstack([self.furo_num_array, self.furo_num_game_array])
        self.furo_num_per_kyoku_array = np.hstack([self.furo_num_per_kyoku_array, self.furo_num_game_array / self.kyoku_num])
        self.tsumo_rate_array = np.hstack([self.tsumo_rate_array, self.tsumo_game_array / self.kyoku_num])
        self.agari_point_array = np.hstack([self.agari_point_array, self.agari_point_game_array])
        self.agari_turn_array = np.hstack([self.agari_turn_array, self.agari_turn_num])
        self.ryukyoku_rate_array = np.hstack([self.ryukyoku_rate_array, self.ryukyoku_num / self.kyoku_num])
        self.ryukyoku_tenpai_rate_array = np.hstack([self.ryukyoku_tenpai_rate_array,
                                                    self.ryukyoku_tenpai_game_array / self.kyoku_num])
</code >
    def calc_game_data(self, folder_path):
        for file in os.listdir(folder_path):
            # ゲーム開始
            xml_data = ET.parse(os.path.join(folder_path, file))
            root = xml_data.getroot()
            Data.set_game_data(self)
            Data.set_kyoku_data(self)
            for child in root:
                # 局開始
                if child.tag=="INIT":
                    self.kyoku_num += 1
                    Data.cumsum_kyoku_data(self)
                    Data.set_kyoku_data(self)
                elif child.tag=="N":
                    self.furo_boolen_kyoku_array[int(child.attrib["who"])] = 1
                    self.furo_num_kyoku_array[int(child.attrib["who"])] += 1
                elif child.tag=="REACH":
                    if child.attrib["step"]=="2":
                        self.reach_kyoku_array[int(child.attrib["who"])] = 1
                elif child.tag=="AGARI":
                    self.agari_kyoku_array[int(child.attrib["who"])] = 1
                    self.agari_point_kyoku_array[int(child.attrib["who"])] = int(child.attrib["ten"].split(",")[1])
                    if child.attrib["who"]==child.attrib["fromWho"]:
                        self.tsumo_kyoku_array[int(child.attrib["who"])] = 1
                    else:
                        self.hoju_kyoku_array[int(child.attrib["fromWho"])] = 1
                    if "owari" in child.attrib:
                       break
                elif child.tag=="RYUUKYOKU":
                    self.ryukyoku_num += 1
                    if "owari" in child.attrib:
                       break
            if self.kyoku_num!=0:
                Data.cumsum_kyoku_data(self)
                Data.aggregate_data(self)
 
tonpu_data = Data()
tonpu_data.calc_game_data(folder_path_tonpu)
hantyan_data = Data()
hantyan_data.calc_game_data(folder_path_hantyan)
 
pd.DataFrame(list(
    zip(
        [
            len(tonpu_data.kyoku_num_array),
            "{:.3f}".format(np.mean(tonpu_data.kyoku_num_array)),
            "{:.3f}".format(np.mean(tonpu_data.agari_rate_array)),
            "{:.3f}".format(np.mean(tonpu_data.hoju_rate_array)),
            "{:.3f}".format(np.mean(tonpu_data.reach_rate_array)),
            "{:.3f}".format(np.mean(tonpu_data.furo_rate_array)),
            "{:.3f}".format(np.mean(tonpu_data.furo_num_array)),
            "{:.3f}".format(np.mean(tonpu_data.furo_num_per_kyoku_array)),
            "{:.3f}".format(np.mean(tonpu_data.agari_point_array[tonpu_data.agari_point_array!=0])),
            "{:.3f}".format(np.mean(tonpu_data.ryukyoku_rate_array))
        ],
        [
            len(hantyan_data.kyoku_num_array),
            "{:.3f}".format(np.mean(hantyan_data.kyoku_num_array)),
            "{:.3f}".format(np.mean(hantyan_data.agari_rate_array)),
            "{:.3f}".format(np.mean(hantyan_data.hoju_rate_array)),
            "{:.3f}".format(np.mean(hantyan_data.reach_rate_array)),
            "{:.3f}".format(np.mean(hantyan_data.furo_rate_array)),
            "{:.3f}".format(np.mean(hantyan_data.furo_num_array)),
            "{:.3f}".format(np.mean(hantyan_data.furo_num_per_kyoku_array)),
            "{:.3f}".format(np.mean(hantyan_data.agari_point_array[hantyan_data.agari_point_array!=0])),
            "{:.3f}".format(np.mean(hantyan_data.ryukyoku_rate_array))
        ],
        [
            "",
            "",
            "{:.3f}".format(stats.ttest_ind(tonpu_data.agari_rate_array, hantyan_data.agari_rate_array)[1]),
            "{:.3f}".format(stats.ttest_ind(tonpu_data.hoju_rate_array, hantyan_data.hoju_rate_array)[1]),
            "{:.3f}".format(stats.ttest_ind(tonpu_data.reach_rate_array, hantyan_data.reach_rate_array)[1]),
            "{:.3f}".format(stats.ttest_ind(tonpu_data.furo_rate_array, hantyan_data.furo_rate_array)[1]),
            "",
            "",
            "",
            "{
:.3f}".format(stats.ttest_ind(tonpu_data.ryukyoku_rate_array, hantyan_data.ryukyoku_rate_array)[1])
        ]
    )), 
    index=["ゲーム数", "局数", "和了率", "放銃率", "リーチ率", "副露率", "副露回数", "副露回数/局数",
           "平均和了点数", "流局率"],
    columns=["東風", "半荘", "t検定"])
 
fig = plt.figure(figsize=(12,6))
plt.rcParams["font.size"] = 18
ax = plt.subplot(111)
# plt.ylim(0, 80)
ax.hist(tonpu_data.agari_rate_array,  bins=50, color="b", alpha = 0.5, label="東風")
ax.hist(hantyan_data.agari_rate_array, bins=50, color="g", alpha = 0.5, label="半荘")
#軸設定
ax.legend()