I rewrote my randomvertexcolors addon to use numpy and I get about a 50% reduction (from 3.1 seconds to 2.0 seconds on a mesh with almost 1M polygons) with the following code:

```
def execute(self, context):
bpy.ops.object.mode_set(mode='OBJECT')
mesh = context.scene.objects.active.data
vertex_colors = mesh.vertex_colors.active.data
polygons = mesh.polygons
verts = mesh.vertices
npolygons = len(polygons)
nverts = len(verts)
nloops = len(vertex_colors)
if self.usenumpy:
start = time()
startloop = np.empty(npolygons, dtype=np.int)
numloops = np.empty(npolygons, dtype=np.int)
polygon_indices = np.empty(npolygons, dtype=np.int)
polygons.foreach_get('index', polygon_indices)
polygons.foreach_get('loop_start', startloop)
polygons.foreach_get('loop_total', numloops)
colors = np.random.random_sample((npolygons,3))
loopcolors = np.empty((nloops,3))
for s,n,pi in np.nditer([startloop, numloops, polygon_indices]):
loopcolors[slice(s,s+n)] = colors[pi]
loopcolors = loopcolors.flatten()
vertex_colors.foreach_set("color", loopcolors)
else:
start = time()
for poly in polygons:
color = [random(), random(), random()]
for loop_index in range(poly.loop_start, poly.loop_start + poly.loop_total):
vertex_colors[loop_index].color = color
if self.timeit:
print("%s: %d/%d (verts/polys) in %.1f seconds"%("numpy" if self.usenumpy else "plain", nverts, npolygons, time()-start))
bpy.ops.object.mode_set(mode='VERTEX_PAINT')
bpy.ops.object.mode_set(mode='EDIT')
bpy.ops.object.mode_set(mode='VERTEX_PAINT')
context.scene.update()
return {'FINISHED'}
```

As you can see I retrieved all the indices from both loops and polys first and then did the assignment of the random colors by using numpy’s nditer(). Now I am not a numpy expert so I guess that instead of creating all those slice objects even better results might be possible by creating index arrays.