Read/Write Files
References
Read Excel
- https://sparkbyexamples.com/pandas/pandas-read-excel-with-examples/ (uses pandas)
- https://www.dangtrinh.com/2015/08/excel-to-list-of-dictionaries-in-python.html (uses openpyxl)
requirement.txt file
pandas==2.0.3
xlrd==2.0.1
openpyxl==3.1.2
import pandas as pd
df = pd.read_excel("MeteorData.xlsx", sheet_name="Sheet1", nrows=10)
#print(df)
# Iterate all rows using DataFrame.iterrows()
"""
for index, row in df.iterrows():
print (index, row["Year"], row["ID"], row["Name"], row["Mass"])
"""
# Iterate all rows using DataFrame.itertuples() -- faster than iterrows
"""
for row in df.itertuples(index = True):
print (getattr(row,'Index'), getattr(row, "Year"), getattr(row, "ID"), getattr(row, "Name"), getattr(row, "Mass"))
"""
# Convert the pandas dataframe to a list of dictionaries
# https://datatofish.com/convert-pandas-dataframe-to-list/
def format_cell_value(cell_value):
# TODO: The Year column is a date time field with output like this: Timestamp('2000-01-01 00:00:00'). Need to convert to date time string.
if type(cell_value) is bytes:
cell_value = cell_value.encode('utf-8').decode('ascii', 'ignore')
cell_value = cell_value.strip()
elif type(cell_value) is int:
cell_value = str(cell_value)
elif cell_value is None:
cell_value = ''
return cell_value
list_of_dictionaries = []
for index, row in df.iterrows():
row_dictionary = {}
for col in df.columns:
row_dictionary[col] = format_cell_value(row[col])
list_of_dictionaries.append(row_dictionary)
print(list_of_dictionaries)
Read a JSON File
Using simplejson package
import simplejson as json
with open('config.json', encoding="UTF-8") as f:
loadedJSON = json.load(f, encoding="UTF-8")
# Interact with the loaded JSON file as a python dictionary
# Get a count of items: len(loadedJSON)
# Get value of a item property: loadedJSON[propName], loadedJSON[0][propName]
# Iterate the items: for jsonItem in loadedJSON:
# Pretty print the loaded JSON
# print(json.dumps(loadedJSON, sort_keys=True, indent=4 * ' '))
Write to a JSON File
import json
jsonData = {"a": 100, "b": "A string value"}
with open("output.json", "w") as f:
json.dump(data, f)
import json
jsonData = {"a": 100, "b": "A string value"}
with open("output.json", "w", encoding="UTF-8") as f:
json.dump(jsonData, f, ensure_ascii=False, sort_keys=True, indent=4)
Read SQLite
environment.yaml file
name: project-name
channels:
- defaults
dependencies:
- pandas=1.5.3
- python=3.11.3
import sqlite3 as sl
import pandas as pd
con = sl.connect('test.db')
"""
with con:
con.execute('CREATE TABLE USER (id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,name TEXT,age INTEGER);')
"""
"""
sql = 'INSERT INTO USER (id, name, age) values(?, ?, ?)'
data = [
(1, 'Alice', 21),
(2, 'Bob', 22),
(3, 'Chris', 23)
]
with con:
con.executemany(sql, data)
"""
with con:
data = con.execute("SELECT * FROM USER WHERE age <= 45")
for row in data:
print(row)
df = pd.read_sql_query("SELECT * FROM USER WHERE age <= 45", con)
print(df)
con.commit()
con.close()