Joseph Ellsworth / data_log

Dependencies:   W25Q80BV multi-serial-command-listener

Dependents:   xj-data-log-test-and-example

Files at this revision

API Documentation at this revision

Comitter:
joeata2wh
Date:
Wed Apr 13 04:15:43 2016 +0000
Parent:
10:0d1d96ba7279
Commit message:
Added Python parser to convert DLOG format captured from readall command in serial port into CSV format for use in excel an R.

Changed in this revision

pc_parser/parse_dlog.py Show annotated file Show diff for this revision Revisions of this file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pc_parser/parse_dlog.py	Wed Apr 13 04:15:43 2016 +0000
@@ -0,0 +1,335 @@
+"""
+  Parse DLOG format from chip into something easier to work with on the PC
+  Combines the CPU ID, Date, Time into single records and optionally adjusts
+  the date time for local time offset to make easier to read. 
+  
+  Optionally save as CSV, TSV, or Elastic Search data structure to make secondary
+  analysis easier.
+  
+  NOTE:  Use the excell feature to wrap text on field names and freeze payne to 
+    make much easier to use. 
+  
+  Assumes you have used something like the terraterm file capture to read the 
+  entire log contents from from the chip using the "readall" command.  
+  
+  Due to way we read the log it is fairly likely that we will re-read the same item
+  multiple times for the same time stamp so we de-dup them.   This is because I quite
+  often issue a readall command and fail to issue an erase after that occurs. This causes
+  the  same log entries to occur in multiple capture files. This utility keeps only the
+  most recent version located.
+  
+  Sometimes it is easier and more space wise to log a series of smaller records at
+  different time intervals but you need a wholisic view to get an idea of complete 
+  system state.   The mergeLast feature allows the system to merge the most recent 
+  record of different log types into the current record providing a larger virtual
+  records in the output CSV.
+  
+  We know that we will add fields and record types over time so we accomodate this by
+  converting all records to a name indexed version then when we save them we can look up
+  the fields even if some of the older records do not contain all the fields. 
+  
+  #TODO: Detect type .T fields and parse them a time stamps and adjust to local time.  
+  Otherwise they are difficult to read in excel.   
+  
+  #TODO: Consider logging all time fields in form of hh:mm:ss it will not use much more
+  space the the unix number and is much easier to read. Have not done it yet because sometimes
+  these go back across a day boundary. 
+  
+"""
+import re
+import json
+import datetime
+import time
+from datetime import tzinfo, timedelta, datetime
+import csv
+import glob
+
+# match time hh:mm:ss followed by Space followed by label followed by tab.
+logPref = re.compile(r"\d\d\:\d\d\:\d\d\s\w{1,10}\t")
+
+# Grouping pattern to allow splitting log out into separate parts
+parsPat = re.compile(r"(\d\d)\:(\d\d)\:(\d?\.*\d*)\s(\w{1,10})\t(.*)")
+dateGrpPat = re.compile(r"(\d\d\d\d).*(\d\d).*(\d\d)")
+
+localTimeAdj = timedelta(hours=-7.0) # FOR PACIFIC TIME # hours to add to current time for GMT adjustment
+
+currDate = "01/01/01"
+currYear = 1
+currMon = 1
+currDay = 1
+ActiveCPUID = None
+# Every time we see a new header type we create a new row for it.
+# so we can calculate the field positions relative to field names.
+#  we know that fields will be added and inserted over time so we 
+#  always have to process the most recent log relative to the most
+#  recent defenition for that field. 
+activeHeaders = {}
+recsByType = {}      # contians a array of records with field names indexed by type
+fldTypesByType = {}  # contains a array of conversion functions called when parsing input
+recFldPositions = {} # contains a array of fld names to allow fldName from pos lookup
+lastRecByType = {}
+mergeLast = {}
+
+def parse(fiName):
+    global ActiveCPUID, currDate, currYear, currMon, currDay, localTimeAdj, mergeLast
+    print "parse ", fiName
+    f = open(fiName)
+    print "mergeLast=", mergeLast
+    for aline in f:
+      #print "aline=", aline
+      rm = re.match(logPref, aline)
+      if (rm != None):  
+        #print "rm=", rm
+        po = re.split(parsPat, aline)
+        #print "po=", po
+        hour = int(po[1])
+        min  = int(po[2])
+        sec  = float(po[3])
+        tag  = po[4].strip().upper()
+        data = po[5].strip()
+        #print "hour=", hour, " min=", min, "sec=", sec, "tag=", tag, "data=", data
+
+        if tag == "DATE":
+          tarr = re.split(dateGrpPat,data)
+          #print "tarr=", tarr
+          currYear = int(tarr[1])
+          currMon  = int(tarr[2])
+          currDay  = int(tarr[3])
+          print "DATE tarr=", tarr, " currYear=", currYear, "currMon=", currMon,  "currDay=", currDay
+      
+        elif tag == "HEAD":
+          # Save our most recent defenition
+          tarr = data.split("\t",1)
+          #print("tarr=", tarr)
+          recName = tarr[0].upper()
+          if len(tarr) < 2: 
+            continue
+          tarr = tarr[1].split(",")
+          ndx = 0
+          recMap = {}
+          fldTypes = {}
+          fldPositions = []
+          activeHeaders[recName] = recMap
+          fldTypesByType[recName] = fldTypes
+          recFldPositions[recName] = fldPositions
+          for fname in tarr:
+            fname = fname.strip()
+            recMap[ndx] = fname
+            # Figure out type hint if available
+            fsega = fname.split(".")
+            fldPositions.append(fname)
+            ftype = fsega[-1]
+            if ftype == "f":
+              fldTypes[ndx] = float
+            elif ftype == "l":
+              fldTypes[ndx] = long
+            elif ftype == "i":
+              fldTypes[ndx] = int
+            else:
+               ftype = str
+            # set up for next field
+            ndx = ndx + 1           
+              
+      
+        else:
+           recName = tag
+           arec = {};
+           recArr = {}
+           if (recsByType.has_key(recName)):
+             recArr = recsByType[recName]
+           else:
+             recsByType[recName] = recArr
+           flds = data.split(",")
+
+           
+           recDef = {}
+           if activeHeaders.has_key(recName):
+             recDef = activeHeaders[recName]
+           
+           fldTypes = {}
+           if fldTypesByType.has_key(recName):
+             fldTypes = fldTypesByType[recName]
+           
+           
+           #print "recName=", recName, "recDef=", recDef, " JSON=", json.dumps(recDef)
+           if recName == "boot" and len(flds) > 2:
+             ActiveCPUID=flds[0]
+
+                      
+           # Merge that last of occurence of defined set of records
+           # into this record.  EG: If we are logging state we probably
+           # also need the last sensor reading.           
+           #print "rec_name=", recName
+           if mergeLast.has_key(recName):
+             for mergeRecName in mergeLast[recName]:             
+               if lastRecByType.has_key(mergeRecName):
+                   mergeRec = lastRecByType[mergeRecName] 
+                   #print "mergeRec=", mergeRec
+                   for mergeFldName in mergeRec:
+                    #print "mergeRecName=", mergeRecName, "mergeFldName=", mergeFldName
+                    arec[mergeFldName] = mergeRec[mergeFldName]
+          
+             
+           if ActiveCPUID != None:
+             arec["cid"] = ActiveCPUID
+           # Compute a Local Adjusted Time for this log entry
+           
+           fractSec = int((sec - int(sec)) * 1000000) # convert fractional seconds to micro seconds
+           #ltime = datetime(currYear, currMon, currDay, hour, min, int(sec), fractSec)
+           ltime = datetime(currYear, currMon, currDay, hour, min, int(sec), fractSec)
+           adjtime = ltime + localTimeAdj
+           asiso = adjtime.isoformat()
+           arec["time"] = asiso
+           #print "ltime=", ltime, ltime.isoformat(), " adjtime=", adjtime, " iso=", asiso
+       
+           
+           # Update the record with the fields 
+           # Do this after the merge because we want any
+           # fields with same name to take precendence from
+           # the merge to record.
+           fndx = 0
+           for afld in flds:
+             fldName = "x" + str(fndx)
+             if recDef.has_key(fndx):
+               fldName = recDef[fndx]
+            
+             if fldTypes.has_key(fndx):
+               convFun = fldTypes[fndx]
+               try:
+                 if convFun != None and convFun != str:
+                   afld = convFun(afld)
+               except: 
+                  # just in case the conversion fails fallback
+                  # and treat it as a string
+                  afld = recDef[fndx]
+                  
+             arec[fldName] = afld
+             #print " fndx=", fndx, " fname=", fldName, "val", afld, " type=",type(afld)
+             fndx = fndx + 1
+           #// keeps most recent rec for this time stamp for this record type
+           recArr[asiso] = arec
+           lastRecByType[recName] = arec
+           #print "REC AS JSON=", json.dumps(arec)
+    
+
+# Merge Records with identical time stamps
+# and different types 
+def mergeRecords(baseType, auxType):
+    recs =  recsByType[recType];
+    auxRecs =  recsByType[auxType]
+    reckeys = recs.keys()
+    reckeys.sort()    
+    for akey in reckeys:
+      brec = recs[akey]
+      if auxRecs.has_key[akey]:
+        auxrec = auxRecs[akey]
+        for fname,fval in auxrec:
+          brec[fname] = auxrec[fval]
+                            
+
+           
+def toCSVMerged(baseFiName, baseType):
+  pass
+           
+           
+# Generate a CSV file for every record type ordered by dtime
+# with dtime adjusted for local time.            
+def saveAsCSV(baseFiName):
+  recTypes = recsByType.keys();
+  for recType in recTypes:
+    fldNamesUsed = {}
+    outFiName = baseFiName + "." + recType + ".csv"
+    
+    
+    fldNames = recFldPositions[recType];
+    outFldNames = []
+    for fldName in fldNames:
+      outFldNames.append(fldName)
+      fldNamesUsed[fldName] = 1      
+      
+    # merge in additional field names if needed for merged records mergeName in mergeLast[recType]:
+    if mergeLast.has_key(recType):
+      for mergeRecName in mergeLast[recType]:
+        mergeFlds = recFldPositions[mergeRecName]
+        for mergeFldName in mergeFlds:
+          if not(fldNamesUsed.has_key(mergeFldName)):
+            outFldNames.append(mergeFldName)
+            fldNamesUsed[mergeFldName] = 1
+      
+     
+    #print "fldnames=", fldnames
+    fout = open(outFiName, "w")
+    fout.write("time,id,")
+    fout.write(",".join(outFldNames))
+    fout.write("\n")
+    recs = recArr = recsByType[recType];
+    reckeys = recs.keys()
+    reckeys.sort()
+    for akey in reckeys:
+      arec = recs[akey]
+      #print "areckey=", arec
+      recOut = [];
+      recOut.append(arec["time"])
+      if arec.has_key("cid"):
+        recOut.append(arec["cid"])
+      else:
+        recOut.append("")
+      # merge fields will already be in the target record
+      for fldName in outFldNames:
+        #print "fldName=", fldName, " arec=", json.dumps(arec)
+        if fldName == "time":
+          continue
+        if arec.has_key(fldName):
+          fldval = arec[fldName]
+          fldvalstr = str(fldval)
+          recOut.append(fldvalstr)          
+        else:
+          recOut.append("")
+      recStr = ",".join(recOut)
+      fout.write(recStr)
+      fout.write("\n")
+    fout.close()
+  
+  
+   
+   
+# TODO:
+def toMongo(baseFiName):
+  pass
+
+# TODO: 
+def toElastic(baseFiName):
+  recTypes = recsByType.keys();
+  for recType in recTypes:
+    outFiName = baseFiName + "." + recType + ".csv"
+    fldnames = recFldPositions;
+    fout = open(outFiName, "w")
+    fout.write(",".join(fldnames))
+    fout.write("\n")
+    recs = recArr = recsByType[recType];
+    reckeys = recs.keys()
+    reckeys.sort()
+    for akey in reckeys:
+      arec = recs[akey]
+      #print "areckey=", arec
+      fout.write(json.dumps(arec))
+      fout.write("\n")
+    fout.close()
+ 
+ 
+mergeLast["STAT"] = ["SENS","NIGHTE","DAYE"] # instructs parser to merge the last sense record into the current 
+  # state record whenever it finds one.    
+  
+localTimeAdj = timedelta(hours=-7.0) # FOR PACIFIC TIME # hours to add to current time for GMT adjustment
+parse("c:\\a2wh\\plant-unit-01\\a2wh-2016-04-11-1241.DLOG.TXT")
+
+logFiles = glob.glob("c:\\a2wh\\plant-unit-01\\a2wh*.DLOG.TXT")
+for fiName in logFiles:
+  parse(fiName)
+
+saveAsCSV("c:\\a2wh\plant-unit-01\\testx2")
+
+      
+        
+      
+       
\ No newline at end of file