所有数据:[[2,2],[3,3],[4,4],[-2,-2],[-3,-3],[-4,-4]]
1 T1=10 T2=5 距离度量:org.apache.mahout.common.distance.ManhattanDistanceMeasure
2 [ 2, 2] 产生集群{ID:0 中心:[2,2] 归属点:[2,2]}
3 [ 3, 3] 于ID:0距离为2 <T2,强归属点 更新集群{ID:0 中心:[2,2] 归属点:[2,2],[3,3]}
4 [ 4, 4] 于ID:0距离为4 <T2,强归属点 更新集群{ID:0 中心:[2,2] 归属点:[2,2],[3,3],[4,4]}
5 [-2,-2] 于ID:0距离为8 <T1,>T2,弱归属点 更新集群{ID:0 中心:[2,2] 归属点:[2,2],[3,3],[4,4],[-2,-2]} 产生集群{ID:1 中心:[-2,-2] 归属点:[-2,-2]}
6 [-3,-3] 于ID:0距离为10 >T2,弱归属点
于ID:1距离为2 <T2,强归属点 更新集群{ID:1 中心:[-2,-2] 归属点:[-2,-2],[-3,-3]}
7 [-4,-4] 于ID:0距离为12 >T1,>T2,弱归属点
于ID:1距离为4 <T2,强归属点 更新集群{ID:1 中心:[-2,-2] 归属点:[-2,-2],[-3,-3],[-4,-4]}
8 重新计算集群中心点:{ID:0 中心:[1.75,1.75] 归属点:[2,2],[3,3],[4,4],[-2,-2]} {ID:1 中心:[-3,-3] 归属点:[-2,-2],[-3,-3],[-4,-4]}
9 对中心点进行集群:[[1.75,1.75],[-3,-3]]
10 [1.75,1.75] 产生集群{ID:A0 中心:[1.75,1.75] 归属点:[1.75,1.75]}
11 [-3,-3] 于ID:A0距离为9.5 <T1,>T2,弱归属点 更新集群{ID:A0 中心:[1.75,1.75] 归属点:[1.75,1.75],[-3,-3]} 产生集群{ID:A1 中心:[-3,-3] 归属点:[-3,-3]}
12 重新计算集群中心点:{ID:A0 中心:[-0.625,-0.625] 归属点:[1.75,1.75],[-3,-3]} 产生集群{ID:A1 中心:[-3,-3] 归属点:[-3,-3]}
对所以数据进行集群计算,易得[2,2],[3,3],[4,4]属于A0,[-2,-2],[-3,-3],[-4,-4]属于A1
/* CanopyMapper.java*/
@Override
protected void map(WritableComparable<?> key, VectorWritable point,
Context context) throws IOException, InterruptedException {
canopyClusterer.addPointToCanopies(point.get(), canopies);
}
@Override
protected void cleanup(Context context) throws IOException,
InterruptedException {
for (Canopy canopy : canopies) {
canopy.computeParameters();
if (canopy.getNumObservations() > clusterFilter) {
context.write(new Text("centroid"), new VectorWritable(canopy
.getCenter()));
}
}
super.cleanup(context);
}
/* CanopyClusterer.java*/
public void addPointToCanopies(Vector point, Collection<Canopy> canopies) {
boolean pointStronglyBound = false;
for (Canopy canopy : canopies) {
double dist = measure.distance(canopy.getCenter().getLengthSquared(), canopy.getCenter(), point);
if (dist < t1) {
if (log.isDebugEnabled()) {
log.debug("Added point: {} to canopy: {}", AbstractCluster.formatVector(point, null), canopy.getIdentifier());
}
canopy.observe(point);
}
pointStronglyBound = pointStronglyBound || dist < t2;
}
if (!pointStronglyBound) {
if (log.isDebugEnabled()) {
log.debug("Created new Canopy:{} at center:{}", nextCanopyId, AbstractCluster.formatVector(point, null));
}
canopies.add(new Canopy(point, nextCanopyId++, measure));
}
}
/* CanopyReducer.java */
@Override
protected void reduce(Text arg0, Iterable<VectorWritable> values,
Context context) throws IOException, InterruptedException {
for (VectorWritable value : values) {
Vector point = value.get();
canopyClusterer.addPointToCanopies(point, canopies);
}
for (Canopy canopy : canopies) {
canopy.computeParameters();
if (canopy.getNumObservations() > clusterFilter) {
ClusterWritable clusterWritable = new ClusterWritable();
clusterWritable.setValue(canopy);
context.write(new Text(canopy.getIdentifier()), clusterWritable);
}
}
} |
/* CanopyMapper.java*/
@Override
protected void map(WritableComparable<?> key, VectorWritable point,
Context context) throws IOException, InterruptedException {
canopyClusterer.addPointToCanopies(point.get(), canopies);
}
@Override
protected void cleanup(Context context) throws IOException,
InterruptedException {
for (Canopy canopy : canopies) {
canopy.computeParameters();
if (canopy.getNumObservations() > clusterFilter) {
context.write(new Text("centroid"), new VectorWritable(canopy
.getCenter()));
}
}
super.cleanup(context);
}
/* CanopyClusterer.java*/
public void addPointToCanopies(Vector point, Collection<Canopy> canopies) {
boolean pointStronglyBound = false;
for (Canopy canopy : canopies) {
double dist = measure.distance(canopy.getCenter().getLengthSquared(), canopy.getCenter(), point);
if (dist < t1) {
if (log.isDebugEnabled()) {
log.debug("Added point: {} to canopy: {}", AbstractCluster.formatVector(point, null), canopy.getIdentifier());
}
canopy.observe(point);
}
pointStronglyBound = pointStronglyBound || dist < t2;
}
if (!pointStronglyBound) {
if (log.isDebugEnabled()) {
log.debug("Created new Canopy:{} at center:{}", nextCanopyId, AbstractCluster.formatVector(point, null));
}
canopies.add(new Canopy(point, nextCanopyId++, measure));
}
}
/* CanopyReducer.java */
@Override
protected void reduce(Text arg0, Iterable<VectorWritable> values,
Context context) throws IOException, InterruptedException {
for (VectorWritable value : values) {
Vector point = value.get();
canopyClusterer.addPointToCanopies(point, canopies);
}
for (Canopy canopy : canopies) {
canopy.computeParameters();
if (canopy.getNumObservations() > clusterFilter) {
ClusterWritable clusterWritable = new ClusterWritable();
clusterWritable.setValue(canopy);
context.write(new Text(canopy.getIdentifier()), clusterWritable);
}
}
}