지금 당장은 Python으로 GridFit 버전을 구현했습니다. 다른 사람이 사용하고 싶다면, 자유롭게 - CC-Zero에 만족합니다. 알고리즘을 개선 할 수있는 방법이있을 수 있습니다. 예를 들어, 상자의 종횡비가 아닌 점 분포를 사용하여 수직 및 수평으로 양분 할시기를 선택하는 방법이 있습니다.
import numpy as np
def bisect(points, indices, bottom_left, top_right):
'''Freely redistributable Python implementation by Yan Wong of the pixel-fitting "Gridfit" algorithm as described in: Keim, D. A.
and Herrmann, A. (1998) The Gridfit algorithm: an efficient and effective approach to visualizing large amounts of spatial data.
Proceedings of the Conference on Visualization \'98, 181-188.
The implementation here differs in 2 main respects from that in the paper. Firstly areas are not always bisected in horizontal then vertical order,
instead they are bisected horizontally if the area is taller then wide, and vertically if wider than tall. Secondly, a single pass algorithm
is used which produces greater consistency, in that the order of the points in the dataset does not determine the outcome (unless points have
identical X or Y values. Details are described in comments within the code.'''
if len(indices)==0:
return
width_minus_height = np.diff(top_right - bottom_left)
if width_minus_height == 0:
#bisect on the dimension which best splits up the point to each side of the midline
evenness = np.abs(np.mean(points[indices] < (top_right+bottom_left)/2.0, axis=0)-0.5)
dim = int(evenness[0] > evenness[1])
else:
dim = int(width_minus_height > 0) #if if wider than tall, bisect on dim = 1
minpix = bottom_left[dim]
maxpix = top_right[dim]
size = maxpix-minpix
if size == 1: # we are done: set the position of the point to the middle of the pix
if len(indices) > 1: print "ERROR" #sanity check: remove for faster speed
points[indices, :] = bottom_left+0.5
return
other_dim = top_right[1-dim] - bottom_left[1-dim]
cutpoint_from = (maxpix+minpix)/2.0
cutpoint_to = None
lower_cut = int(np.floor(cutpoint_from))
upper_cut = int(np.ceil(cutpoint_from))
lower = points[indices, dim] < lower_cut
upper = points[indices, dim] >= upper_cut
lower_points = indices[lower]
upper_points = indices[upper]
if lower_cut!=upper_cut: # initial cutpoint falls between pixels. If cutpoint will not shift, we need to round it up or down to the nearest integer
mid_points = indices[np.logical_and(~lower, ~upper)]
low_cut_lower = len(lower_points) <= (lower_cut - minpix) * other_dim
low_cut_upper = len(upper_points) + len(mid_points) <= (maxpix-lower_cut) * other_dim
up_cut_lower = len(lower_points) + len(mid_points) <= (upper_cut-minpix) * other_dim
up_cut_upper = len(upper_points) <= (maxpix-upper_cut) * other_dim
low_cut_OK = (low_cut_lower and low_cut_upper)
up_cut_OK = (up_cut_lower and up_cut_upper)
if low_cut_OK and not up_cut_OK:
cutpoint_from = lower_cut
upper_points = np.append(upper_points, mid_points)
elif up_cut_OK and not low_cut_OK:
cutpoint_from = upper_cut
lower_points = np.append(lower_points, mid_points)
else:
lowmean = np.mean(points[indices, dim]) < cutpoint_from
if low_cut_OK and up_cut_OK:
if (lowmean):
cutpoint_from = lower_cut
upper_points = np.append(upper_points, mid_points)
else:
cutpoint_from = upper_cut
lower_points = np.append(lower_points, mid_points)
else:
#if neither low_cut_OK or up_cut_OK, we will end up shifting the cutpoint to an integer value anyway => no need to round up or down
lower_points = indices[points[indices, dim] < cutpoint_from]
upper_points = indices[points[indices, dim] >= cutpoint_from]
if (lowmean):
cutpoint_to = lower_cut
else:
cutpoint_to = upper_cut
else:
if len(lower_points) > (cutpoint_from-minpix) * other_dim or len(upper_points) > (maxpix-cutpoint_from) * other_dim:
top = maxpix - len(upper_points) * 1.0/other_dim
bot = minpix + len(lower_points) * 1.0/other_dim
if len(lower_points) > len(upper_points):
cutpoint_to = int(np.floor(bot)) #shift so that the area with most points shifted as little as poss
#cutpoint_to = int(np.floor(top)) #alternative shift giving area with most points max to play with: seems to give worse results
elif len(lower_points) < len(upper_points):
cutpoint_to = int(np.ceil(top)) #shift so that the area with most points shifted as little as poss
#cutpoint_to = int(np.ceil(bot)) #alternative shift giving area with most points max to play with: seems to give worse results
if cutpoint_to is None:
cutpoint_to = cutpoint_from
else:
# As identified in the Gridfit paper, we may still not be able to fit points into the space, if they fall on the dividing line, e.g.
# imagine 9 pixels (3x3), with 5 points on one side of the (integer) cut line and 4 on the other. For consistency, and to avoid 2 passes
# we simply pick a different initial cutoff line, so that one or more points are shifted between the initial lower and upper regions
#
# At the same time we can deal with cases when we have 2 identical values, by adding or subtracting a small increment to the first in the list
cutpoint_to = np.clip(cutpoint_to, minpix+1, maxpix-1) #this means we can get away with fewer recursions
if len(lower_points) > (cutpoint_to - minpix) * other_dim:
sorted_indices = indices[np.argsort(points[indices, dim])]
while True:
cutoff_index = np.searchsorted(points[sorted_indices, dim], cutpoint_from, 'right')
if cutoff_index <= (cutpoint_to - minpix) * other_dim:
lower_points = sorted_indices[:cutoff_index]
upper_points = sorted_indices[cutoff_index:]
break;
below = sorted_indices[cutoff_index + [-1,-2] ]
if (np.diff(points[below, dim])==0): #rare: only if points have exactly the same value. If so, shift the upper one up a bit
points[below[0], dim] += min(0.001, np.diff(points[sorted_indices[slice(cutoff_index-1, cutoff_index+1)], dim]))
cutpoint_from = np.mean(points[below, dim]) #place new cutpoint between the two points below the current cutpoint
if len(upper_points) > (maxpix - cutpoint_to) * other_dim:
sorted_indices = indices[np.argsort(points[indices, dim])]
while True:
cutoff_index = np.searchsorted(points[sorted_indices, dim], cutpoint_from, 'left')
if len(sorted_indices)-cutoff_index <= (maxpix - cutpoint_to) * other_dim:
lower_points = sorted_indices[:cutoff_index]
upper_points = sorted_indices[cutoff_index:]
break;
above = sorted_indices[cutoff_index + [0,1] ]
if (np.diff(points[above, dim])==0): #rare: only if points have exactly the same value. If so, shift the lower one down a bit
points[above[0], dim] -= min(0.001, np.diff(points[sorted_indices[slice(cutoff_index-1, cutoff_index+1)], dim]))
cutpoint_from = np.mean(points[above, dim]) #place new cutpoint above the two points below the current cutpoint
#transform so that lower set of points runs from minpix .. cutpoint_to instead of minpix ... cutpoint_from
points[lower_points, dim] = (points[lower_points, dim] - minpix) * (cutpoint_to - minpix)/(cutpoint_from - minpix) + minpix
#scale so that upper set of points runs from cutpoint_to .. maxpix instead of cutpoint_from ... maxpix
points[upper_points, dim] = (points[upper_points, dim] - cutpoint_from) * (maxpix - cutpoint_to)/(maxpix - cutpoint_from) + cutpoint_to
select_dim = np.array([1-dim, dim])
bisect(points, lower_points, bottom_left, top_right * (1-select_dim) + cutpoint_to * select_dim)
bisect(points, upper_points, bottom_left * (1-select_dim) + cutpoint_to * select_dim, top_right)
#visualise an example
from Tkinter import *
n_pix, scale = 500, 15
np.random.seed(12345)
#test on 2 normally distributed point clouds
all_points = np.vstack((np.random.randn(n_pix//2, 2) * 3 + 30, np.random.randn(n_pix//2, 2) * 6 + 2))
#all_points = np.rint(all_points*50).astype(np.int)/50.0 #test if the algorithm works with rounded
bl, tr = np.floor(np.min(all_points, 0)), np.ceil(np.max(all_points, 0))
print "{} points to distribute among {} = {} pixels".format(all_points.shape[0], "x".join(np.char.mod("%i", tr-bl)), np.prod(tr-bl))
if np.prod(tr-bl) > n_pix:
pts = all_points.copy()
bisect(all_points, np.arange(all_points.shape[0]), bl, tr)
print np.hstack((pts,all_points))
print "Mean distance between original and new point = {}".format(np.mean(np.sqrt(np.sum((pts - all_points)**2, 1))))
master = Tk()
hw = (tr-bl)* scale +1
win = Canvas(master, width=hw[1], height=hw[0])
win.pack()
all_points = (all_points-bl) * scale
pts = (pts-bl) * scale
for i in range(pts.shape[0]):
win.create_line(int(pts[i,1]), int(pts[i,0]), int(all_points[i,1]), int(all_points[i,0]))
for i in range(all_points.shape[0]):
win.create_oval(int(pts[i,1])-2, int(pts[i,0])-2, int(pts[i,1])+2, int(pts[i,0])+2, fill="blue")
for i in range(all_points.shape[0]):
win.create_oval(int(all_points[i,1])-3, int(all_points[i,0])-3, int(all_points[i,1])+3, int(all_points[i,0])+3, fill="red")
mainloop()
그리고 당신이있는 경우, 가장 가까운 격자 점이다,라고 200 점 말 (100, 100), 당신이 정말로 각 더 부정확 한 결과를, 그들의 진정한 위치에서 점점 더 그들을 밖으로 확산주고 유지 하시겠습니까 후속 지점? 다른 색깔의 점 또는 다른 모양의 마커 또는 주석 또는 무언가를 사용하여 가까운 일치를 처리하는 것이 더 나을 수도 있습니다. 그렇지 않으면 특정 값 근처에있는 200 번째 포인트가 실제로 실제 위치에서 16 개 그리드 포인트 떨어져 있습니다 ... – twalberg
사실,이 경우에는 포인트의 xy 위치가 어쩌면 이상하지 않습니다. 적색/녹색 점의 비율보다 중요합니다. 그래도 원본에 가까울수록 좋습니다. 또한 변위의 측정이 너무 큰 경우 중단 할 수도 있습니다. – user2667066
P. 이것이 나의 목표는 아니지만 맵의 이미지 위에 다수의 지리 참조 연산 된 정사각형 이미지를 최적으로 배치하는 것으로 상상할 수도 있습니다. 이미지가 특정 지역에 붐비고 다른 사람들에게는 전혀 없다는 문제가 있습니다. 이 경우 내가 위의 "픽셀"이라고 부르는 것은 사실상 40x40 픽셀의 미리보기 이미지 일 수 있으며, 수천 개의 미리보기 이미지를 사용하여 10 메가 픽셀지도 이미지에 배치 할 수 있습니다. 목표는 그림을 지리적 위치로 연결하는 선의 길이를 최소화하는 것입니다. 물론 똑같은 것은 아니지만 합리적으로 비슷한 문제입니다. – user2667066