diff --git a/parse.py b/parse.py new file mode 100644 index 0000000..fb69625 --- /dev/null +++ b/parse.py @@ -0,0 +1,57 @@ +import sqlite3 +from datetime import datetime, timedelta +from json import load +from os import listdir +from os.path import join + +from tqdm import tqdm + +DIRECTORY = "./data" + +con = sqlite3.connect("data.db") +cu = con.cursor() + +# Create database table +cu.execute( + "CREATE TABLE messages (id INTEGER, date TEXT, hour INT, category TEXT, type TEXT)" +) + +for file in listdir(DIRECTORY): + print(file) + category = input("Category: ") + with open(join(DIRECTORY, file), "r") as f: + json = load(f) + for message in tqdm(load(f).get("messages")): + # Parse time string into DateTime object + # Some of the messages don't have miliseconds so there needs to be two statements + try: + timestamp = datetime.strptime( + message["timestamp"], "%Y-%m-%dT%H:%M:%S.%f%z" + ) + except ValueError: + timestamp = datetime.strptime( + message["timestamp"], "%Y-%m-%dT%H:%M:%S%z" + ) + # Convert UCT to EDT + hour = timestamp.hour - 5 + + # Change date if hour rolled around + if hour < 0: + hour += 24 + timestamp -= timedelta(days=1) + + cu.execute( + "INSERT INTO messages (date, hour, category, type) VALUES (?, ?, ?, ?)", + ( + timestamp.strftime("%Y-%m-%d"), + hour, + category, + "SENT" + if message["author"]["name"] == "paulstretch" + else "RECEIVED", + ), + ) + +# Commit changes and close the database +con.commit() +con.close() \ No newline at end of file