Python 中文數字轉換

starzodiac
4 min readFeb 9, 2021

--

關鍵字: python 大寫數字轉換、中文轉阿拉伯數字

大部分是參考網路上的做法,其中比較特別是有需要做資料清洗的動作(利用re.split()),同時也做了不少防呆的判斷(例如不能出現一萬萬),給大家參考參考

import re
from smallamountparser import small_amount_convert
convert_table = {
'零': 0, '一': 1, '二': 2, '兩': 2, '三': 3, '四': 4, '五': 5,\
'六': 6, '七': 7, '八': 8, '九': 9, '十': 10, '百': 100,\
'千': 1000, '〇': 0, '○': 0, '0': 0, '1': 1, '2': 2, '3': 3,\
'4': 4, '5': 5, '6': 6, '7': 7, '8': 8, '9': 9, '壹': 1,\
'貳': 2, '參': 3, '肆': 4, '伍': 5, '陸': 6, '陆': 6, '柒': 7,\
'捌': 8, '玖': 9, '拾': 10, '佰': 100, '仟': 1000, '萬': 10000,\
'億': 100000000
}
DELIM = u'[圓元正整*※$NT,-]'def amount_convert(input_str):
if input_str == '':
return input_str
# Filter all meaningless character
amount_list = [x for x in re.split(DELIM, input_str) if x is not '']
amount_str = ''
for i in amount_list:
amount_str += i
try:
# Apply small amount parser when input can be converted to number directly
float(amount_str)
# do small amount convert...
except ValueError:
# do chinese convert
# 基數
digit = 0
is_digit = False
# 位數
is_magnitude = False
# 萬位數
is_wan = False
# 億位數
is_yi = False
tmp_total = 0
result = 0
for digit_str in amount_str:
cur_num = convert_table.get(digit_str, None)
if cur_num == None:
return "fail: " + input_str
elif cur_num >= 10:
# 如果位數後不是萬或億,回傳 fail (ex:一百百)
if is_magnitude and cur_num != 100000000 and cur_num != 10000:
return "fail: " + input_str
else:
if cur_num == 100000000 or cur_num == 10000:
# "億、萬"只能出現一次,如果重複出現,回傳 fail
if is_yi and cur_num == 100000000:
return "fail: " + input_str
if is_wan and cur_num == 10000:
return "fail: " + input_str
if is_digit:
result += (tmp_total + digit) * cur_num
is_digit = False
digit = 0
else:
result += tmp_total * cur_num
tmp_total = 0 if cur_num == 100000000:
is_yi = True
if cur_num == 10000:
is_wan = True
else:
tmp_total += digit * cur_num
is_magnitude = True
is_digit = False
digit = 0
else:
# It is valid when '零' among two words.
if cur_num == 0:
continue
if is_digit:
return "fail: " + input_str
digit = cur_num
is_digit = True
is_magnitude = False
result += (tmp_total + digit)
return str(result)

--

--

No responses yet