defbyte2str(data): '''将原始字节码转为字符串''' i = 0 length = len(data) ret = u'' while i < length: x = data[i:i+2] t = chr(struct.unpack('H', x)[0]) if t == u'\r': ret += u'\n' elif t != u' ': ret += t i += 2 return ret # 获取拼音表
defgetPyTable(data):
if data[0:4] != b"\x9D\x01\x00\x00": returnNone data = data[4:] pos = 0 length = len(data) while pos < length: index =struct.unpack('H',data[pos:pos+2])[0] # print(index) pos += 2 l = struct.unpack('H',data[pos:pos+2])[0] # print(l) pos += 2 py = byte2str(data[pos:pos + l]) #print(py) GPy_Table[index] = py pos += l
# 获取一个词组的拼音
defgetWordPy(data): pos = 0 length = len(data) ret = [] while pos < length:
index =struct.unpack('H',data[pos:pos+2])[0] ret.append(GPy_Table[index]) pos += 2 return'\''.join(ret)
# 获取一个词组
defgetWord(data): pos = 0 length = len(data) ret = u'' while pos < length:
index = struct.unpack('H', data[pos] + data[pos + 1])[0] ret += GPy_Table[index] pos += 2 return ret