2018 December 11 Programming

Python读取/写入：常用数据文件

1. JSON

import json

# ------ 读取json文件 ------
f = open('example.json', 'r')
fdict = json.load(f) # type: dict

# ------ 将dict转换成str ------
str_dict = json.dumps(fdict) # type: str

# ------ 将str转换成dict ------
new_dict = json.loads(str_dict) # type: dict

# ------ 将dict写入json文件 ------
f = open('exmaple.json', 'w')
json.dump(new_dict, f)

2. CSV

import numpy as np
fname = 'mnist.txt'
data = np.loadtxt(filename, 
                  delimiter=',',  # String used to separate values
                  skiprows=2,     # Skip the first 2 lines 
                  usecols=[0,2],  # Read the 1st and 3rd column
                  dtype=str)      # The type of the resulting array

import csv
# ----------------- 读取csv文件 -----------------
# ------ 一般reader ------
with open('test.csv', 'r') as fp:
    lines = csv.reader(fp)
    for line in lines:
        print(line) # line为list类型
# ------ DictReader ------
with open('names.csv', newline='') as csvfile:
     reader = csv.DictReader(csvfile)
     for row in reader:
         print(row['first_name'], row['last_name'])
# ----------------- 写入csv文件 -----------------
# ------ 一般writer ------
fp = open('test.csv', 'w')
writer = csv.writer(fp, delimiter=',', lineterminator='\n')
writer.writerow(['col1', 'col2']) # 写入一行
writer.writerows([['col1', 'col2'], ['item1', 'item2']]) # 写入多行
# -----------
# It should always be safe to specify newline='', since the csv module does its own (universal) newline handling.
with open('eggs.csv', 'w', newline='') as csvfile:
    spamwriter = csv.writer(csvfile, delimiter=' ',
                            quotechar='|', quoting=csv.QUOTE_MINIMAL)
    spamwriter.writerow(['Spam'] * 5 + ['Baked Beans'])
    spamwriter.writerow(['Spam', 'Lovely Spam', 'Wonderful Spam'])
# ------ DictWriter ------
with open('names.csv', 'w', newline='') as csvfile:
    fieldnames = ['first_name', 'last_name']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerow({'first_name': 'Baked', 'last_name': 'Beans'})

3. XLS/XLSX

import panda as pd
# -------------------------------
fname = 'example.xlsx'
data = pd.ExcelFile(fname)

print(data.sheet_names) # access the sheet names

sheet_name = 'xxx'
sheet = data.parse(sheet_name,
                   parse_cols=[0],
                   skiprows=[0],
                   names=['Country']) # sheet type: DataFrame or Dict of DataFrames
# -------------------------------
# ------ 另一种等价方法 ------
sheet = pd.read_excel(fname,
                     sheet_name,
                     parse_cols=[0],
                     skiprows=[0],
                     names=['Country'])

4. MAT

from scipy.io import loadmat
fname = 'exmaple.mat'
mat = loadmat(fname) # dictionary with variable names as keys, and loaded matrices as values

5. PKL

import pickle
f = open('example.pkl', 'rb')
data = pickle.load(f)

6. HDF5

import h5py
fname = 'example.hdf5'
data = h5py.File(fname, 'r')