import json
import pandas as pd
import numpy as np
data = open("Corona2.json", encoding="utf-8").read()
data = json.loads(data)
examples = data["examples"]
examples_list = []
for example in examples:
annotation = example["annotations"]
for i in annotation:
example_dict = {"id": i["id"], "tag_id": i["tag_id"], "end": i["end"], "start": i["start"],
"tag_name": i["tag_name"], "value": i["value"], "json": i}
examples_list.append(example_dict)
examples_df = pd.DataFrame(examples_list)
examples_df = examples_df.sort_values("tag_name")
examples_df = examples_df.reset_index().iloc[:, 1:]
examples_df.to_csv("table.csv")
stat = pd.DataFrame(examples_df.groupby("tag_name").count()["id"])
stat.to_csv("stat count.csv")
examples_df["value_len"] = examples_df["value"].apply(lambda x: len(x))
stat_mean = pd.DataFrame(examples_df.groupby("tag_name").mean()["value_len"])
stat_mean.to_csv("mean len.csv")

Amazingmonkeys
- 粉丝: 398
- 资源: 93