from urllib import urlretrieve
import csv
import datetime
from io import BytesIO
import numpy as np
import json
import copy
import random
import uuid
from IPython.display import HTML
from nupic.encoders import MultiEncoder
import nupic.research.temporal_memory
import nupic.bindings.algorithms
%%javascript
require.config({
paths: {
d3: '//d3js.org/d3.v3.min',
hello: "//mrcslws.com/stuff/segment-stories.2016.04.28"
}
});
def drawEverything(columnStatesChartBuilder, segmentLifetimesChartBuilder):
elementId = str(uuid.uuid1())
addChart = """
<div id="%s" style="-webkit-touch-callout: none; -webkit-user-select: none; -moz-user-select: none;
-ms-user-select: none; user-select: none;"></div>
<script>
require(['hello', 'd3'], function() {
var myContainer = zoomableTimeSeries(document.getElementById('%s'));
insertColumnStatesAndSegmentLifetimes(myContainer, '%s', '%s');
});
</script>
""" % (elementId, elementId,
columnStatesChartBuilder.output.getvalue().replace('\r', '\\r').replace('\n', '\\n'),
segmentLifetimesChartBuilder.getOutput().replace('\r', '\\r').replace('\n', '\\n'))
return HTML(addChart)
def parseHotgym(hotgymRaw):
parsed = []
csvReader = csv.reader(hotgymRaw)
csvReader.next()
csvReader.next()
csvReader.next()
for row in csvReader:
timestampStr, consumptionStr = row
parsed.append({
'timestamp': datetime.datetime.strptime(timestampStr,
"%m/%d/%y %H:%M"),
'consumption': float(consumptionStr)
})
return parsed
def encodeHotgym(hotgym):
encoder = MultiEncoder({
'consumption': {
'fieldname': 'consumption',
'name': 'consumption',
'type': 'ScalarEncoder',
'minval': 5.0,
'maxval': 55.0,
'clipInput': True,
'n': 478,
'w': 31,
},
'timestamp_timeOfDay': {
'fieldname': 'timestamp',
'name': 'timestamp_timeOfDay',
'timeOfDay': (21, 1),
'type': 'DateEncoder'
},
'timestamp_weekend': {
'fieldname': 'timestamp',
'name': 'timestamp_weekend',
'type': 'DateEncoder',
'weekend': 21
},
})
return map(lambda row : encoder.encode(row), hotgym)
with open(urlretrieve("http://mrcslws.com/stuff/rec-center-hourly.csv")[0],
'r') as hotgym:
HOTGYM_ENCODED_ALL = encodeHotgym(parseHotgym(hotgym))
HOTGYM_INDICES_ALL = map(lambda encoding : set(encoding.nonzero()[0]),
HOTGYM_ENCODED_ALL)
HOTGYM_ENCODED = HOTGYM_ENCODED_ALL[:1000]
HOTGYM_INDICES = HOTGYM_INDICES_ALL[:1000]
Here I've tuned the encoders to produce sparse outputs and to use every bit.
len(HOTGYM_ENCODED[0].nonzero()[0])
# Sparsity
len(HOTGYM_ENCODED[0].nonzero()[0]) / float(len(HOTGYM_ENCODED[0]))
# Number of columns
len(HOTGYM_ENCODED[0])
# Show how often each bit is used
np.set_printoptions(threshold=1024)
reduce(lambda accum, encoding: accum + encoding,
HOTGYM_ENCODED,
np.zeros(1024, dtype='uint32'))
class ColumnStatesPatcher(object):
def __init__(self):
self.output = BytesIO()
def patchTM(self, tm):
csvOutput = csv.writer(self.output)
headerRow = [
'n-unpredicted-active-columns',
'n-predicted-inactive-columns',
'n-predicted-active-columns',
]
csvOutput.writerow(headerRow)
computeMethod = tm.compute
def myCompute(activeColumns, **kwargs):
predictedCells = tm.predictiveCells
predictedColumns = set([cell / tm.cellsPerColumn for cell in predictedCells])
computeResult = computeMethod(activeColumns, **kwargs)
row = (
len(activeColumns - predictedColumns),
len(predictedColumns - activeColumns),
len(activeColumns & predictedColumns),
)
csvOutput.writerow(row)
return computeResult
tm.compute = myCompute
class SegmentLifetimeChartBuilder(object):
def __init__(self):
# (cell, segment) => {}
self.segments = {}
# (cell, segment, {}) because a segment might get destroyed multiple times
self.destroyedSegments = []
self.timestep = 0
def getOutput(self):
outputSegments = []
for cell, segment, data in self.destroyedSegments:
out = {
'birthstep': data['birthstep'],
'deathstep': data['deathstep'],
}
if len(data['correctMatches']) > 0:
out['correctMatches'] = data['correctMatches']
if len(data['incorrectMatches']) > 0:
out['incorrectMatches'] = data['incorrectMatches']
if len(data['correctActivations']) > 0:
out['correctActivations'] = data['correctActivations']
if len(data['incorrectActivations']) > 0:
out['incorrectActivations'] = data['incorrectActivations']
outputSegments.append(out)
for k, data in self.segments.items():
out = {
'birthstep': data['birthstep'],
}
if len(data['correctMatches']) > 0:
out['correctMatches'] = data['correctMatches']
if len(data['incorrectMatches']) > 0:
out['incorrectMatches'] = data['incorrectMatches']
if len(data['correctActivations']) > 0:
out['correctActivations'] = data['correctActivations']
if len(data['incorrectActivations']) > 0:
out['incorrectActivations'] = data['incorrectActivations']
outputSegments.append(out)
outputSegments = sorted(outputSegments, key = lambda x : x['birthstep'])
output = {
'nTimesteps': self.timestep,
'segments': outputSegments
}
return json.dumps(output)
def patchTM(self, tm):
# patch connections createSegment and destroySegment
createSegment = tm.connections.createSegment
def myCreateSegment(cell):
segment = createSegment(cell)
k = (cell, segment)
assert k not in self.segments
self.segments[k] = {
'birthstep': self.timestep,
'correctMatches': [],
'incorrectMatches': [],
'correctActivations': [],
'incorrectActivations': []
}
return segment
tm.connections.createSegment = myCreateSegment
destroySegment = tm.connections.destroySegment
def myDestroySegment(segment):
cell = tm.connections.cellForSegment(segment)
destroySegment(segment)
k = (cell, segment)
data = self.segments[k]
del self.segments[k]
data['deathstep'] = self.timestep
v = (cell, segment, data)
self.destroyedSegments.append(v)
tm.connections.destroySegment = myDestroySegment
compute = tm.compute
def myCompute(activeColumns, **kwargs):
self.beforeCompute(tm, activeColumns)
compute(activeColumns, **kwargs)
self.afterCompute(tm, activeColumns)
tm.compute = myCompute
def beforeCompute(self, tm, activeColumns):
# for every active segment, use its previous timestep.
# use current timestep info to choose how to classify them.
self.active = []
for segment in tm.activeSegments:
cell = tm.connections.cellForSegment(segment)
k = (cell, segment)
self.active.append(k)
self.matching = []
for segment in tm.matchingSegments:
cell = tm.connections.cellForSegment(segment)
k = (cell, segment)
self.matching.append(k)
def afterCompute(self, tm, activeColumns):
for k in self.active:
cell, segment = k
segmentData = None
if k in self.segments:
segmentData = self.segments[k]
else:
for cell2, segment2, data in reversed(self.destroyedSegments):
if cell2 == cell and segment2 == segment:
segmentData = data
break
if cell in tm.activeCells:
segmentData['correctActivations'].append(self.timestep)
else:
segmentData['incorrectActivations'].append(self.timestep)
for k in self.matching:
cell, segment = k
segmentData = None
if k in self.segments:
segmentData = self.segments[k]
else:
for cell2, segment2, data in reversed(self.destroyedSegments):
if cell2 == cell and segment2 == segment:
segmentData = data
break
if cell in tm.activeCells:
segmentData['correctMatches'].append(self.timestep)
else:
segmentData['incorrectMatches'].append(self.timestep)
self.timestep += 1
experiment1_column_states = ColumnStatesPatcher()
experiment1_segment_lifetimes = SegmentLifetimeChartBuilder()
def experiment1(tm=None):
if tm is None:
tm = nupic.research.temporal_memory.TemporalMemory(
columnDimensions=(1024,),
cellsPerColumn=4,
activationThreshold=13,
initialPermanence=0.21,
connectedPermanence=0.50,
minThreshold=10,
maxNewSynapseCount=20,
permanenceIncrement=0.10,
permanenceDecrement=0.10,
predictedSegmentDecrement=0.07*0.10,
maxSegmentsPerCell=4,
maxSynapsesPerSegment=255,
seed=42)
experiment1_column_states.patchTM(tm)
experiment1_segment_lifetimes.patchTM(tm)
i = 0
for activeColumns in HOTGYM_INDICES:
i += 1
if i % 100 == 0:
print "timestep %d" % i
tm.compute(activeColumns)
return tm
e1_tm = experiment1()
drawEverything(experiment1_column_states, experiment1_segment_lifetimes)
for i in xrange(1000):
if i % 100 == 0:
print "timestep %d" % i
activeColumns = set(random.sample(xrange(1024), 73))
e1_tm.compute(activeColumns)
drawEverything(experiment1_column_states, experiment1_segment_lifetimes)
experiment1(e1_tm)
drawEverything(experiment1_column_states, experiment1_segment_lifetimes)