1 import xlrd,json 2 3 data = xlrd.open_workbook("C:\\Users\\zcb\\Desktop\\data.xlsx") 4 5 sheet1 = data.sheet_by_name("Sheet1") 6 final_data = [] 7 idx = 1 8 for i in range(51,59): 9 row = sheet1.row_values(i)10 sentence ={}11 sentence["sentence_id"] = row[0]12 sentence["sentence"]=row[1].split("|")13 sentence["语法笔记"]=row[2]14 temp = row[3].split("|")[:-1]15 temp_list = []16 for i in range(0,len(temp),8):17 temp_dict = {}18 temp_dict["word_id"]=idx19 temp_dict["word"] = temp[i].strip(" \n")20 temp_dict["pron"] = temp[i + 1].strip(" \n")21 temp_dict["词性"] = temp[i + 2].strip(" \n")22 if ‘$‘ in temp_dict["词性"]:23 temp_dict["词性"] = [l.strip(" ") for l in temp_dict["词性"].split("$")]24 temp_dict["记忆"] = temp[i + 3].strip(" \n")25 if ‘$‘ in temp_dict["记忆"]:26 temp_dict["记忆"] = [l.strip(" ") for l in temp_dict["记忆"].split("$")]27 temp_dict["搭配"] = temp[i + 4].strip(" \n")28 if ‘$‘ in temp_dict["搭配"]:29 temp_dict["搭配"] = [l.strip(" ") for l in temp_dict["搭配"].split("$")]30 temp_dict["同义"] = temp[i + 5].strip(" \n")31 if ‘$‘ in temp_dict["同义"]:32 temp_dict["同义"] = [l.strip(" ") for l in temp_dict["同义"].split("$")]33 temp_dict["反义"] = temp[i + 6].strip(" \n")34 if ‘$‘ in temp_dict["反义"]:35 temp_dict["反义"] = [l.strip(" ") for l in temp_dict["反义"].split("$")]36 temp_dict["同根"] = temp[i + 7].strip(" \n")37 if ‘$‘ in temp_dict["同根"]:38 temp_dict["同根"] = [l.strip(" ") for l in temp_dict["同根"].split("$")]39 temp_list.append(temp_dict)40 idx +=141 sentence["核心词表"] = temp_list42 temp = row[4].split("|")[:-1]43 temp_list = []44 for i in range(0,len(temp),8):45 if "的词" in temp[i]:46 topic = temp[i].split("的词")[0].strip(" \n")+"的词"47 48 temp_dict = {}49 temp_dict["主题"] = topic50 temp_dict["word_id"] = idx51 temp_dict["word"] = temp[i].split("的词")[-1].strip(": \n")52 temp_dict["pron"] = temp[i + 1].strip(" \n")53 temp_dict["词性"] = temp[i + 2].strip(" \n")54 if ‘$‘ in temp_dict["词性"]:55 temp_dict["词性"] = [ l.strip(" ") for l in temp_dict["词性"].split("$") ]56 temp_dict["记忆"] = temp[i + 3].strip(" \n")57 if ‘$‘ in temp_dict["记忆"]:58 temp_dict["记忆"] = [ l.strip(" ") for l in temp_dict["记忆"].split("$") ]59 temp_dict["搭配"] = temp[i + 4].strip(" \n")60 if ‘$‘ in temp_dict["搭配"]:61 temp_dict["搭配"] = [ l.strip(" ") for l in temp_dict["搭配"].split("$") ]62 temp_dict["同义"] = temp[i + 5].strip(" \n")63 if ‘$‘ in temp_dict["同义"]:64 temp_dict["同义"] = [ l.strip(" ") for l in temp_dict["同义"].split("$") ]65 temp_dict["反义"] = temp[i + 6].strip(" \n")66 if ‘$‘ in temp_dict["反义"]:67 temp_dict["反义"] = [ l.strip(" ") for l in temp_dict["反义"].split("$") ]68 temp_dict["同根"] = temp[i + 7].strip(" \n")69 if ‘$‘ in temp_dict["同根"]:70 temp_dict["同根"] = [ l.strip(" ") for l in temp_dict["同根"].split("$") ]71 idx+=172 temp_list.append(temp_dict)73 sentence["主题归纳"] = temp_list74 75 final_data.append(sentence)76 with open("final_data.json","w",encoding="utf8") as f:77 json.dump(final_data,f,ensure_ascii=False)
View Code
data.zip :https://files.cnblogs.com/files/zach0812/data.zip