Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Dependencies: W25Q80BV multi-serial-command-listener
Dependents: xj-data-log-test-and-example
Revision 11:bf816d33be80, committed 2016-04-13
- Comitter:
- joeata2wh
- Date:
- Wed Apr 13 04:15:43 2016 +0000
- Parent:
- 10:0d1d96ba7279
- Commit message:
- Added Python parser to convert DLOG format captured from readall command in serial port into CSV format for use in excel an R.
Changed in this revision
| pc_parser/parse_dlog.py | Show annotated file Show diff for this revision Revisions of this file |
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pc_parser/parse_dlog.py Wed Apr 13 04:15:43 2016 +0000
@@ -0,0 +1,335 @@
+"""
+ Parse DLOG format from chip into something easier to work with on the PC
+ Combines the CPU ID, Date, Time into single records and optionally adjusts
+ the date time for local time offset to make easier to read.
+
+ Optionally save as CSV, TSV, or Elastic Search data structure to make secondary
+ analysis easier.
+
+ NOTE: Use the excell feature to wrap text on field names and freeze payne to
+ make much easier to use.
+
+ Assumes you have used something like the terraterm file capture to read the
+ entire log contents from from the chip using the "readall" command.
+
+ Due to way we read the log it is fairly likely that we will re-read the same item
+ multiple times for the same time stamp so we de-dup them. This is because I quite
+ often issue a readall command and fail to issue an erase after that occurs. This causes
+ the same log entries to occur in multiple capture files. This utility keeps only the
+ most recent version located.
+
+ Sometimes it is easier and more space wise to log a series of smaller records at
+ different time intervals but you need a wholisic view to get an idea of complete
+ system state. The mergeLast feature allows the system to merge the most recent
+ record of different log types into the current record providing a larger virtual
+ records in the output CSV.
+
+ We know that we will add fields and record types over time so we accomodate this by
+ converting all records to a name indexed version then when we save them we can look up
+ the fields even if some of the older records do not contain all the fields.
+
+ #TODO: Detect type .T fields and parse them a time stamps and adjust to local time.
+ Otherwise they are difficult to read in excel.
+
+ #TODO: Consider logging all time fields in form of hh:mm:ss it will not use much more
+ space the the unix number and is much easier to read. Have not done it yet because sometimes
+ these go back across a day boundary.
+
+"""
+import re
+import json
+import datetime
+import time
+from datetime import tzinfo, timedelta, datetime
+import csv
+import glob
+
+# match time hh:mm:ss followed by Space followed by label followed by tab.
+logPref = re.compile(r"\d\d\:\d\d\:\d\d\s\w{1,10}\t")
+
+# Grouping pattern to allow splitting log out into separate parts
+parsPat = re.compile(r"(\d\d)\:(\d\d)\:(\d?\.*\d*)\s(\w{1,10})\t(.*)")
+dateGrpPat = re.compile(r"(\d\d\d\d).*(\d\d).*(\d\d)")
+
+localTimeAdj = timedelta(hours=-7.0) # FOR PACIFIC TIME # hours to add to current time for GMT adjustment
+
+currDate = "01/01/01"
+currYear = 1
+currMon = 1
+currDay = 1
+ActiveCPUID = None
+# Every time we see a new header type we create a new row for it.
+# so we can calculate the field positions relative to field names.
+# we know that fields will be added and inserted over time so we
+# always have to process the most recent log relative to the most
+# recent defenition for that field.
+activeHeaders = {}
+recsByType = {} # contians a array of records with field names indexed by type
+fldTypesByType = {} # contains a array of conversion functions called when parsing input
+recFldPositions = {} # contains a array of fld names to allow fldName from pos lookup
+lastRecByType = {}
+mergeLast = {}
+
+def parse(fiName):
+ global ActiveCPUID, currDate, currYear, currMon, currDay, localTimeAdj, mergeLast
+ print "parse ", fiName
+ f = open(fiName)
+ print "mergeLast=", mergeLast
+ for aline in f:
+ #print "aline=", aline
+ rm = re.match(logPref, aline)
+ if (rm != None):
+ #print "rm=", rm
+ po = re.split(parsPat, aline)
+ #print "po=", po
+ hour = int(po[1])
+ min = int(po[2])
+ sec = float(po[3])
+ tag = po[4].strip().upper()
+ data = po[5].strip()
+ #print "hour=", hour, " min=", min, "sec=", sec, "tag=", tag, "data=", data
+
+ if tag == "DATE":
+ tarr = re.split(dateGrpPat,data)
+ #print "tarr=", tarr
+ currYear = int(tarr[1])
+ currMon = int(tarr[2])
+ currDay = int(tarr[3])
+ print "DATE tarr=", tarr, " currYear=", currYear, "currMon=", currMon, "currDay=", currDay
+
+ elif tag == "HEAD":
+ # Save our most recent defenition
+ tarr = data.split("\t",1)
+ #print("tarr=", tarr)
+ recName = tarr[0].upper()
+ if len(tarr) < 2:
+ continue
+ tarr = tarr[1].split(",")
+ ndx = 0
+ recMap = {}
+ fldTypes = {}
+ fldPositions = []
+ activeHeaders[recName] = recMap
+ fldTypesByType[recName] = fldTypes
+ recFldPositions[recName] = fldPositions
+ for fname in tarr:
+ fname = fname.strip()
+ recMap[ndx] = fname
+ # Figure out type hint if available
+ fsega = fname.split(".")
+ fldPositions.append(fname)
+ ftype = fsega[-1]
+ if ftype == "f":
+ fldTypes[ndx] = float
+ elif ftype == "l":
+ fldTypes[ndx] = long
+ elif ftype == "i":
+ fldTypes[ndx] = int
+ else:
+ ftype = str
+ # set up for next field
+ ndx = ndx + 1
+
+
+ else:
+ recName = tag
+ arec = {};
+ recArr = {}
+ if (recsByType.has_key(recName)):
+ recArr = recsByType[recName]
+ else:
+ recsByType[recName] = recArr
+ flds = data.split(",")
+
+
+ recDef = {}
+ if activeHeaders.has_key(recName):
+ recDef = activeHeaders[recName]
+
+ fldTypes = {}
+ if fldTypesByType.has_key(recName):
+ fldTypes = fldTypesByType[recName]
+
+
+ #print "recName=", recName, "recDef=", recDef, " JSON=", json.dumps(recDef)
+ if recName == "boot" and len(flds) > 2:
+ ActiveCPUID=flds[0]
+
+
+ # Merge that last of occurence of defined set of records
+ # into this record. EG: If we are logging state we probably
+ # also need the last sensor reading.
+ #print "rec_name=", recName
+ if mergeLast.has_key(recName):
+ for mergeRecName in mergeLast[recName]:
+ if lastRecByType.has_key(mergeRecName):
+ mergeRec = lastRecByType[mergeRecName]
+ #print "mergeRec=", mergeRec
+ for mergeFldName in mergeRec:
+ #print "mergeRecName=", mergeRecName, "mergeFldName=", mergeFldName
+ arec[mergeFldName] = mergeRec[mergeFldName]
+
+
+ if ActiveCPUID != None:
+ arec["cid"] = ActiveCPUID
+ # Compute a Local Adjusted Time for this log entry
+
+ fractSec = int((sec - int(sec)) * 1000000) # convert fractional seconds to micro seconds
+ #ltime = datetime(currYear, currMon, currDay, hour, min, int(sec), fractSec)
+ ltime = datetime(currYear, currMon, currDay, hour, min, int(sec), fractSec)
+ adjtime = ltime + localTimeAdj
+ asiso = adjtime.isoformat()
+ arec["time"] = asiso
+ #print "ltime=", ltime, ltime.isoformat(), " adjtime=", adjtime, " iso=", asiso
+
+
+ # Update the record with the fields
+ # Do this after the merge because we want any
+ # fields with same name to take precendence from
+ # the merge to record.
+ fndx = 0
+ for afld in flds:
+ fldName = "x" + str(fndx)
+ if recDef.has_key(fndx):
+ fldName = recDef[fndx]
+
+ if fldTypes.has_key(fndx):
+ convFun = fldTypes[fndx]
+ try:
+ if convFun != None and convFun != str:
+ afld = convFun(afld)
+ except:
+ # just in case the conversion fails fallback
+ # and treat it as a string
+ afld = recDef[fndx]
+
+ arec[fldName] = afld
+ #print " fndx=", fndx, " fname=", fldName, "val", afld, " type=",type(afld)
+ fndx = fndx + 1
+ #// keeps most recent rec for this time stamp for this record type
+ recArr[asiso] = arec
+ lastRecByType[recName] = arec
+ #print "REC AS JSON=", json.dumps(arec)
+
+
+# Merge Records with identical time stamps
+# and different types
+def mergeRecords(baseType, auxType):
+ recs = recsByType[recType];
+ auxRecs = recsByType[auxType]
+ reckeys = recs.keys()
+ reckeys.sort()
+ for akey in reckeys:
+ brec = recs[akey]
+ if auxRecs.has_key[akey]:
+ auxrec = auxRecs[akey]
+ for fname,fval in auxrec:
+ brec[fname] = auxrec[fval]
+
+
+
+def toCSVMerged(baseFiName, baseType):
+ pass
+
+
+# Generate a CSV file for every record type ordered by dtime
+# with dtime adjusted for local time.
+def saveAsCSV(baseFiName):
+ recTypes = recsByType.keys();
+ for recType in recTypes:
+ fldNamesUsed = {}
+ outFiName = baseFiName + "." + recType + ".csv"
+
+
+ fldNames = recFldPositions[recType];
+ outFldNames = []
+ for fldName in fldNames:
+ outFldNames.append(fldName)
+ fldNamesUsed[fldName] = 1
+
+ # merge in additional field names if needed for merged records mergeName in mergeLast[recType]:
+ if mergeLast.has_key(recType):
+ for mergeRecName in mergeLast[recType]:
+ mergeFlds = recFldPositions[mergeRecName]
+ for mergeFldName in mergeFlds:
+ if not(fldNamesUsed.has_key(mergeFldName)):
+ outFldNames.append(mergeFldName)
+ fldNamesUsed[mergeFldName] = 1
+
+
+ #print "fldnames=", fldnames
+ fout = open(outFiName, "w")
+ fout.write("time,id,")
+ fout.write(",".join(outFldNames))
+ fout.write("\n")
+ recs = recArr = recsByType[recType];
+ reckeys = recs.keys()
+ reckeys.sort()
+ for akey in reckeys:
+ arec = recs[akey]
+ #print "areckey=", arec
+ recOut = [];
+ recOut.append(arec["time"])
+ if arec.has_key("cid"):
+ recOut.append(arec["cid"])
+ else:
+ recOut.append("")
+ # merge fields will already be in the target record
+ for fldName in outFldNames:
+ #print "fldName=", fldName, " arec=", json.dumps(arec)
+ if fldName == "time":
+ continue
+ if arec.has_key(fldName):
+ fldval = arec[fldName]
+ fldvalstr = str(fldval)
+ recOut.append(fldvalstr)
+ else:
+ recOut.append("")
+ recStr = ",".join(recOut)
+ fout.write(recStr)
+ fout.write("\n")
+ fout.close()
+
+
+
+
+# TODO:
+def toMongo(baseFiName):
+ pass
+
+# TODO:
+def toElastic(baseFiName):
+ recTypes = recsByType.keys();
+ for recType in recTypes:
+ outFiName = baseFiName + "." + recType + ".csv"
+ fldnames = recFldPositions;
+ fout = open(outFiName, "w")
+ fout.write(",".join(fldnames))
+ fout.write("\n")
+ recs = recArr = recsByType[recType];
+ reckeys = recs.keys()
+ reckeys.sort()
+ for akey in reckeys:
+ arec = recs[akey]
+ #print "areckey=", arec
+ fout.write(json.dumps(arec))
+ fout.write("\n")
+ fout.close()
+
+
+mergeLast["STAT"] = ["SENS","NIGHTE","DAYE"] # instructs parser to merge the last sense record into the current
+ # state record whenever it finds one.
+
+localTimeAdj = timedelta(hours=-7.0) # FOR PACIFIC TIME # hours to add to current time for GMT adjustment
+parse("c:\\a2wh\\plant-unit-01\\a2wh-2016-04-11-1241.DLOG.TXT")
+
+logFiles = glob.glob("c:\\a2wh\\plant-unit-01\\a2wh*.DLOG.TXT")
+for fiName in logFiles:
+ parse(fiName)
+
+saveAsCSV("c:\\a2wh\plant-unit-01\\testx2")
+
+
+
+
+
\ No newline at end of file