from urllib import urlretrieve
import csv
import datetime
from io import BytesIO
import numpy as np
import json
import copy
import random
import uuid
from IPython.display import HTML
from nupic.encoders import MultiEncoder
import nupic.research.TP
%%javascript
require.config({
paths: {
d3: '//d3js.org/d3.v3.min',
hello: "//mrcslws.com/stuff/segment-stories.2016.04.28"
}
});
def drawEverything(columnStatesChartBuilder, segmentLifetimesChartBuilder):
elementId = str(uuid.uuid1())
addChart = """
<div id="%s" style="-webkit-touch-callout: none; -webkit-user-select: none; -moz-user-select: none;
-ms-user-select: none; user-select: none;"></div>
<script>
require(['hello', 'd3'], function() {
var myContainer = zoomableTimeSeries(document.getElementById('%s'));
insertColumnStatesAndSegmentLifetimes(myContainer, '%s', '%s');
});
</script>
""" % (elementId, elementId,
columnStatesChartBuilder.output.getvalue().replace('\r', '\\r').replace('\n', '\\n'),
segmentLifetimesChartBuilder.getOutput().replace('\r', '\\r').replace('\n', '\\n'))
return HTML(addChart)
def parseHotgym(hotgymRaw):
parsed = []
csvReader = csv.reader(hotgymRaw)
csvReader.next()
csvReader.next()
csvReader.next()
for row in csvReader:
timestampStr, consumptionStr = row
parsed.append({
'timestamp': datetime.datetime.strptime(timestampStr,
"%m/%d/%y %H:%M"),
'consumption': float(consumptionStr)
})
return parsed
def encodeHotgym(hotgym):
encoder = MultiEncoder({
'consumption': {
'fieldname': 'consumption',
'name': 'consumption',
'type': 'ScalarEncoder',
'minval': 5.0,
'maxval': 55.0,
'clipInput': True,
'n': 478,
'w': 31,
},
'timestamp_timeOfDay': {
'fieldname': 'timestamp',
'name': 'timestamp_timeOfDay',
'timeOfDay': (21, 1),
'type': 'DateEncoder'
},
'timestamp_weekend': {
'fieldname': 'timestamp',
'name': 'timestamp_weekend',
'type': 'DateEncoder',
'weekend': 21
},
})
return map(lambda row : encoder.encode(row), hotgym)
with open(urlretrieve("http://mrcslws.com/stuff/rec-center-hourly.csv")[0],
'r') as hotgym:
HOTGYM_ENCODED_ALL = encodeHotgym(parseHotgym(hotgym))
HOTGYM_INDICES_ALL = map(lambda encoding : set(encoding.nonzero()[0]),
HOTGYM_ENCODED_ALL)
HOTGYM_ENCODED = HOTGYM_ENCODED_ALL[:1000]
HOTGYM_INDICES = HOTGYM_INDICES_ALL[:1000]
Here I've tuned the encoders to produce sparse outputs and to use every bit.
len(HOTGYM_ENCODED[0].nonzero()[0])
# Sparsity
len(HOTGYM_ENCODED[0].nonzero()[0]) / float(len(HOTGYM_ENCODED[0]))
# Number of columns
len(HOTGYM_ENCODED[0])
# Show how often each bit is used
np.set_printoptions(threshold=1024)
reduce(lambda accum, encoding: accum + encoding,
HOTGYM_ENCODED,
np.zeros(1024, dtype='uint32'))
class ColumnStatesPatcher(object):
def __init__(self):
self.output = BytesIO()
def patchTP(self, tp):
csvOutput = csv.writer(self.output)
headerRow = [
'n-unpredicted-active-columns',
'n-predicted-inactive-columns',
'n-predicted-active-columns',
]
csvOutput.writerow(headerRow)
computeMethod = tp.compute
def myCompute(bottomUpInput, **kwargs):
activeColumns = set(bottomUpInput.nonzero()[0])
npPredictedCells = tp.getPredictedState().reshape(-1).nonzero()[0]
predictedColumns = set([cell / tp.cellsPerColumn for cell in npPredictedCells.tolist()])
computeResult = computeMethod(bottomUpInput, **kwargs)
row = (
len(activeColumns - predictedColumns),
len(predictedColumns - activeColumns),
len(activeColumns & predictedColumns),
)
csvOutput.writerow(row)
return computeResult
tp.compute = myCompute
class SegmentLifetimeChartBuilderTP(object):
def __init__(self):
# segID => {}
self.segments = {}
# (segID, {}) because a segment might get destroyed multiple times
self.destroyedSegments = []
self.timestep = 0
self.prevActiveSegments = []
self.activeSegments = []
def getOutput(self):
outputSegments = []
for k, data in self.destroyedSegments:
out = {
'birthstep': data['birthstep'],
'deathstep': data['deathstep'],
}
if len(data['correctMatches']) > 0:
out['correctMatches'] = data['correctMatches']
if len(data['correctActivations']) > 0:
out['correctActivations'] = data['correctActivations']
if len(data['incorrectActivations']) > 0:
out['incorrectActivations'] = data['incorrectActivations']
outputSegments.append(out)
for k, data in self.segments.items():
out = {
'birthstep': data['birthstep'],
}
if len(data['correctMatches']) > 0:
out['correctMatches'] = data['correctMatches']
if len(data['correctActivations']) > 0:
out['correctActivations'] = data['correctActivations']
if len(data['incorrectActivations']) > 0:
out['incorrectActivations'] = data['incorrectActivations']
outputSegments.append(out)
outputSegments = sorted(outputSegments, key = lambda x : x['birthstep'])
output = {
'nTimesteps': self.timestep,
'segments': outputSegments
}
return json.dumps(output)
def patchTP(self, tp):
def onCorrectMatchingSegment(segment):
k = segment.segID
segmentData = None
if k in self.segments:
segmentData = self.segments[k]
else:
for k2, data in reversed(self.destroyedSegments):
if k2 == k:
segmentData = data
break
assert segmentData is not None
segmentData['correctMatches'].append(self.timestep)
tp.onCorrectMatchingSegment = onCorrectMatchingSegment
def onActiveSegment(segment):
self.activeSegments.append(segment.segID)
tp.onActiveSegment = onActiveSegment
def onDestroySegment(segment):
k = segment.segID
data = self.segments[k]
del self.segments[k]
data['deathstep'] = self.timestep
v = (k, data)
self.destroyedSegments.append(v)
tp.onDestroySegment = onDestroySegment
def onCreateSegment(c, i, segment):
k = segment.segID
assert k not in self.segments
cell = c*tp.cellsPerColumn + i
self.segments[k] = {
'cell': cell,
'birthstep': self.timestep,
'correctMatches': [],
'correctActivations': [],
'incorrectActivations': []
}
tp.onCreateSegment = onCreateSegment
compute = tp.compute
def myCompute(bottomUpInput, **kwargs):
self.beforeCompute(tp)
compute(bottomUpInput, **kwargs)
self.afterCompute(tp)
tp.compute = myCompute
def beforeCompute(self, tp):
self.prevActiveSegments = self.activeSegments
self.activeSegments = []
def afterCompute(self, tp):
for k in self.prevActiveSegments:
segmentData = None
if k in self.segments:
segmentData = self.segments[k]
else:
for k2, data in reversed(self.destroyedSegments):
if k2 == k:
segmentData = data
break
assert segmentData is not None
activeCells = set(tp.getActiveState().nonzero()[0].tolist())
if segmentData['cell'] in activeCells:
segmentData['correctActivations'].append(self.timestep)
else:
segmentData['incorrectActivations'].append(self.timestep)
self.timestep += 1
experiment1_column_states = ColumnStatesPatcher()
experiment1_segment_lifetimes = SegmentLifetimeChartBuilderTP()
def experiment1(tp=None):
if tp is None:
tp = nupic.research.TP.TP(
numberOfCols=1024,
cellsPerColumn=4,
initialPerm=0.21,
connectedPerm=0.50,
minThreshold=10,
newSynapseCount=20,
permanenceInc=0.10,
permanenceDec=0.10,
globalDecay=0.0,
maxAge=0,
activationThreshold=13,
maxSegmentsPerCell=4,
maxSynapsesPerSegment=255,
maxInfBacktrack=0,
maxLrnBacktrack=0,
maxSeqLength=0,
seed=42)
experiment1_column_states.patchTP(tp)
experiment1_segment_lifetimes.patchTP(tp)
i = 0
for encoding in HOTGYM_ENCODED:
i += 1
if i % 100 == 0:
print "timestep %d" % i
tp.compute(encoding, enableLearn=True, computeInfOutput=True)
return tp
e1_tp = experiment1()
drawEverything(experiment1_column_states, experiment1_segment_lifetimes)
for i in xrange(1000):
if i % 100 == 0:
print "timestep %d" % i
activeColumns = random.sample(xrange(1024), 73)
encoding = np.zeros(1024, dtype='uint32')
encoding[activeColumns] = 1
e1_tp.compute(encoding, enableLearn=True, computeInfOutput=True)
drawEverything(experiment1_column_states, experiment1_segment_lifetimes)
experiment1(e1_tp)
drawEverything(experiment1_column_states, experiment1_segment_lifetimes)