--- seg.c 2006-09-10 18:36:51.000000000 +0100 +++ seg.c_new 2009-04-22 17:03:13.000000000 +0100 @@ -69,7 +69,6 @@ bool seg_right(SEG * a, SEG * b); bool seg_over_right(SEG * a, SEG * b); SEG *seg_union(SEG * a, SEG * b); -SEG *seg_inter(SEG * a, SEG * b); void rt_seg_size(SEG * a, float *sz); float *seg_size(SEG * a); @@ -269,14 +268,22 @@ gseg_penalty(GISTENTRY *origentry, GISTENTRY *newentry, float *result) { SEG *ud; + SEG *origrange; + SEG *newrange; float tmp1, tmp2; - ud = seg_union((SEG *) DatumGetPointer(origentry->key), - (SEG *) DatumGetPointer(newentry->key)); + origrange = (SEG *) DatumGetPointer(origentry->key); + newrange = (SEG *) DatumGetPointer(newentry->key); + ud = seg_union(origrange, newrange); rt_seg_size(ud, &tmp1); - rt_seg_size((SEG *) DatumGetPointer(origentry->key), &tmp2); - *result = tmp1 - tmp2; + rt_seg_size(origrange, &tmp2); + if (tmp1 == tmp2) { + rt_seg_size(newrange, &tmp1); + *result = (tmp2 - tmp1) / tmp2; + } else { + *result = tmp1 - tmp2 + 1.0; + } #ifdef GIST_DEBUG fprintf(stderr, "penalty\n"); @@ -297,27 +304,16 @@ GIST_SPLITVEC *v) { OffsetNumber i, - j; SEG *datum_alpha, - *datum_beta; SEG *datum_l, *datum_r; - SEG *union_d, - *union_dl, - *union_dr; - SEG *inter_d; - bool firsttime; - float size_alpha, - size_beta, - size_union, - size_inter; - float size_waste, - waste; - float size_l, - size_r; + bool firsttime; + float lowest, + highest, + midpoint, + split, + midpointsum; int nbytes; - OffsetNumber seed_1 = 1, - seed_2 = 2; OffsetNumber *left, *right; OffsetNumber maxoff; @@ -326,107 +322,64 @@ fprintf(stderr, "picksplit\n"); #endif - maxoff = entryvec->n - 2; + maxoff = entryvec->n - 1; nbytes = (maxoff + 2) * sizeof(OffsetNumber); v->spl_left = (OffsetNumber *) palloc(nbytes); v->spl_right = (OffsetNumber *) palloc(nbytes); + midpointsum = 0.0; firsttime = true; - waste = 0.0; + lowest = 0.0; + highest = 0.0; - for (i = FirstOffsetNumber; i < maxoff; i = OffsetNumberNext(i)) + for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { datum_alpha = (SEG *) DatumGetPointer(entryvec->vector[i].key); - for (j = OffsetNumberNext(i); j <= maxoff; j = OffsetNumberNext(j)) - { - datum_beta = (SEG *) DatumGetPointer(entryvec->vector[j].key); - - /* compute the wasted space by unioning these guys */ - /* size_waste = size_union - size_inter; */ - union_d = seg_union(datum_alpha, datum_beta); - rt_seg_size(union_d, &size_union); - inter_d = seg_inter(datum_alpha, datum_beta); - rt_seg_size(inter_d, &size_inter); - size_waste = size_union - size_inter; - - /* - * are these a more promising split that what we've already seen? - */ - if (size_waste > waste || firsttime) - { - waste = size_waste; - seed_1 = i; - seed_2 = j; - firsttime = false; - } - } + midpoint = (datum_alpha->lower + datum_alpha->upper) / 2; + midpointsum += midpoint; + if (firsttime || (midpoint < lowest)) + { + lowest = midpoint; + } + if (firsttime || (midpoint > highest)) + { + highest = midpoint; + } + firsttime = false; } + split = midpointsum / maxoff; left = v->spl_left; v->spl_nleft = 0; right = v->spl_right; v->spl_nright = 0; - datum_alpha = (SEG *) DatumGetPointer(entryvec->vector[seed_1].key); - datum_l = seg_union(datum_alpha, datum_alpha); - rt_seg_size(datum_l, &size_l); - datum_beta = (SEG *) DatumGetPointer(entryvec->vector[seed_2].key); - datum_r = seg_union(datum_beta, datum_beta); - rt_seg_size(datum_r, &size_r); + datum_l = (SEG *) palloc(sizeof(*datum_l)); + datum_l->lower = lowest; + datum_l->upper = lowest; + datum_r = (SEG *) palloc(sizeof(*datum_r)); + datum_r->lower = highest; + datum_r->upper = highest; /* - * Now split up the regions between the two seeds. An important property - * of this split algorithm is that the split vector v has the indices of - * items to be split in order in its left and right vectors. We exploit - * this property by doing a merge in the code that actually splits the - * page. - * - * For efficiency, we also place the new index tuple in this loop. This is - * handled at the very end, when we have placed all the existing tuples - * and i == maxoff + 1. + * Now split up the regions between the two seeds. */ - maxoff = OffsetNumberNext(maxoff); for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { - /* - * If we've already decided where to place this item, just put it on - * the right list. Otherwise, we need to figure out which page needs - * the least enlargement in order to store the item. - */ - - if (i == seed_1) - { - *left++ = i; - v->spl_nleft++; - continue; - } - else if (i == seed_2) - { - *right++ = i; - v->spl_nright++; - continue; - } - - /* okay, which page needs least enlargement? */ datum_alpha = (SEG *) DatumGetPointer(entryvec->vector[i].key); - union_dl = seg_union(datum_l, datum_alpha); - union_dr = seg_union(datum_r, datum_alpha); - rt_seg_size(union_dl, &size_alpha); - rt_seg_size(union_dr, &size_beta); + midpoint = (datum_alpha->lower + datum_alpha->upper) / 2; /* pick which page to add it to */ - if (size_alpha - size_l < size_beta - size_r) + if (mipoint <= split) { - datum_l = union_dl; - size_l = size_alpha; + datum_l = seg_union(datum_l, datum_alpha); *left++ = i; v->spl_nleft++; } else { - datum_r = union_dr; - size_r = size_alpha; + datum_r = seg_union(datum_r, datum_alpha); *right++ = i; v->spl_nright++; } @@ -665,45 +618,6 @@ return (n); } - -SEG * -seg_inter(SEG * a, SEG * b) -{ - SEG *n; - - n = (SEG *) palloc(sizeof(*n)); - - /* take min of upper endpoints */ - if (a->upper < b->upper) - { - n->upper = a->upper; - n->u_sigd = a->u_sigd; - n->u_ext = a->u_ext; - } - else - { - n->upper = b->upper; - n->u_sigd = b->u_sigd; - n->u_ext = b->u_ext; - } - - /* take max of lower endpoints */ - if (a->lower > b->lower) - { - n->lower = a->lower; - n->l_sigd = a->l_sigd; - n->l_ext = a->l_ext; - } - else - { - n->lower = b->lower; - n->l_sigd = b->l_sigd; - n->l_ext = b->l_ext; - } - - return (n); -} - void rt_seg_size(SEG * a, float *size) {