우선 데이터베이스 정렬이란 무엇입니까? 나는 그것이 SQL_Latin1_General_CP1_CS_AS
또는 SQL_Latin1_General_CP1_CI_AS
이라고 가정 할 것입니다. 그렇다면 다음이 작동해야합니다 (아직 완전히 테스트되지는 않았 음).
글자가 인 것처럼 보입니다. 유니 코드 분류기는 주요 사업입니다. 스펙보다 더 단순한 세금 코드를 보았습니다. ;-) 항상 lookup table (s)과 최소한 3 레벨의 정렬 (문자와 수축을 수정)을 포함하는 것으로 보입니다.
나는하여 Latin 1, Latin Extended-A에 다음과 같은 제한 및 Latin Extended-B 테이블/정렬했습니다. 알고리즘은 그 세트를 상당히 잘 처리해야하지만 필자는이를 충분히 테스트하지도 않았고 (속도와 복잡성을 줄이기 위해) 문자 수정을 적절하게 설명하지 못했습니다.
in action at jsbin.com을 참조하십시오.
기능 :
function bIgnoreForPrimarySort (iCharCode)
{
/*--- A bunch of characters get ignored for the primary sort weight.
The most important ones are the hyphen and apostrophe characters.
A bunch of control characters and a couple of odds and ends, make up
the rest.
*/
if (iCharCode < 9) return true;
if (iCharCode >= 14 && iCharCode <= 31) return true;
if (iCharCode >= 127 && iCharCode <= 159) return true;
if (iCharCode == 39 || iCharCode == 45 || iCharCode == 173) return true;
return false;
}
function SortByRoughSQL_Latin1_General_CP1_CS_AS (sA, sB)
{
/*--- This Sorts Latin1 and extended Latin1 unicode with an approximation
of SQL's SQL_Latin1_General_CP1_CS_AS collation.
Certain modifying characters or contractions my be off (not tested), we trade-off
perfect accuracy for speed and relative simplicity.
True unicode sorting is devilishly complex and we're not getting paid enough to
fully implement it in Javascript. ;-)
It looks like a definative sort would require painstaking exegesis of documents
such as: http://unicode.org/reports/tr10/
*/
//--- This is the master lookup table for Latin1 code-points. Here through the extended set \u02AF
//--- Make this static?
var aSortOrder = [
-1, 151, 152, 153, 154, 155, 156, 157, 158, 2, 3, 4, 5, 6, 159, 160, 161, 162, 163, 164,
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 0, 7, 8, 9, 10, 11, 12, 210,
13, 14, 15, 41, 16, 211, 17, 18, 65, 69, 71, 74, 76, 77, 80, 81, 82, 83, 19, 20,
42, 43, 44, 21, 22, 214, 257, 266, 284, 308, 347, 352, 376, 387, 419, 427, 438, 459, 466, 486,
529, 534, 538, 559, 576, 595, 636, 641, 647, 650, 661, 23, 24, 25, 26, 27, 28, 213, 255, 265,
283, 307, 346, 350, 374, 385, 418, 426, 436, 458, 464, 485, 528, 533, 536, 558, 575, 594, 635, 640,
646, 648, 660, 29, 30, 31, 32, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189,
190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
1, 33, 53, 54, 55, 56, 34, 57, 35, 58, 215, 46, 59, 212, 60, 36, 61, 45, 72, 75,
37, 62, 63, 64, 38, 70, 487, 47, 66, 67, 68, 39, 219, 217, 221, 231, 223, 233, 250, 276,
312, 310, 316, 318, 392, 390, 395, 397, 295, 472, 491, 489, 493, 503, 495, 48, 511, 599, 597, 601,
603, 652, 590, 573, 218, 216, 220, 230, 222, 232, 249, 275, 311, 309, 315, 317, 391, 389, 394, 396,
294, 471, 490, 488, 492, 502, 494, 49, 510, 598, 596, 600, 602, 651, 589, 655, 229, 228, 227, 226,
235, 234, 268, 267, 272, 271, 270, 269, 274, 273, 286, 285, 290, 287, 324, 323, 322, 321, 314, 313,
326, 325, 320, 319, 358, 357, 362, 361, 356, 355, 364, 363, 378, 377, 380, 379, 405, 404, 403, 402,
401, 400, 407, 406, 393, 388, 417, 416, 421, 420, 432, 431, 428, 440, 439, 447, 446, 444, 443, 442,
441, 450, 449, 468, 467, 474, 473, 470, 469, 477, 484, 483, 501, 500, 499, 498, 507, 506, 527, 526,
540, 539, 544, 543, 542, 541, 561, 560, 563, 562, 567, 566, 565, 564, 580, 579, 578, 577, 593, 592,
611, 610, 609, 608, 607, 606, 613, 612, 617, 616, 615, 614, 643, 642, 654, 653, 656, 663, 662, 665,
664, 667, 666, 574, 258, 260, 262, 261, 264, 263, 281, 278, 277, 304, 292, 289, 288, 297, 335, 337,
332, 348, 349, 369, 371, 382, 415, 409, 434, 433, 448, 451, 462, 476, 479, 509, 521, 520, 524, 523,
531, 530, 552, 572, 571, 569, 570, 583, 582, 581, 585, 632, 631, 634, 638, 658, 657, 669, 668, 673,
677, 676, 678, 73, 79, 78, 680, 644, 50, 51, 52, 40, 303, 302, 301, 457, 456, 455, 482, 481,
480, 225, 224, 399, 398, 497, 496, 605, 604, 626, 625, 620, 619, 624, 623, 622, 621, 334, 241, 240,
237, 236, 254, 253, 366, 365, 360, 359, 430, 429, 505, 504, 515, 514, 675, 674, 422, 300, 299, 298,
354, 353, 84, 85, 86, 87, 239, 238, 252, 251, 513, 512, 243, 242, 245, 244, 328, 327, 330, 329,
411, 410, 413, 412, 517, 516, 519, 518, 547, 546, 549, 548, 628, 627, 630, 629, 88, 89, 90, 91,
92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131,
132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 246, 247, 248, 259, 279, 280, 293, 291,
339, 336, 338, 331, 340, 341, 342, 423, 367, 373, 351, 370, 372, 383, 381, 384, 408, 414, 386, 445,
453, 452, 454, 461, 463, 460, 475, 478, 465, 508, 522, 525, 532, 550, 553, 554, 555, 545, 556, 557,
537, 551, 568, 333, 424, 343, 344, 586, 584, 618, 633, 637, 639, 645, 659, 649, 670, 671, 672, 679,
681, 682, 683, 282, 686, 256, 345, 368, 375, 425, 435, 437, 535, 684, 685, 305, 296, 306, 591, 587,
588, 144, 145, 146, 147, 148, 149, 150
];
var iLenA = sA.length, iLenB = sB.length;
var jA = 0, jB = 0;
var sIgnoreBuff_A = [], sIgnoreBuff_B = [];
function iSortIgnoreBuff()
{
var iIgLenA = sIgnoreBuff_A.length, iIgLenB = sIgnoreBuff_B.length;
var kA = 0, kB = 0;
while (kA < iIgLenA && kB < iIgLenB)
{
var igA = sIgnoreBuff_A [kA++], igB = sIgnoreBuff_B [kB++];
if (aSortOrder[igA] > aSortOrder[igB]) return 1;
if (aSortOrder[igA] < aSortOrder[igB]) return -1;
}
//--- All else equal, longest string loses
if (iIgLenA > iIgLenB) return 1;
if (iIgLenA < iIgLenB) return -1;
return 0;
}
while (jA < iLenA && jB < iLenB)
{
var cA = sA.charCodeAt (jA++);
var cB = sB.charCodeAt (jB++);
if (cA == cB)
{
continue;
}
while (bIgnoreForPrimarySort (cA))
{
sIgnoreBuff_A.push (cA);
if (jA < iLenA)
cA = sA.charCodeAt (jA++);
else
break;
}
while (bIgnoreForPrimarySort (cB))
{
sIgnoreBuff_B.push (cB);
if (jB < iLenB)
cB = sB.charCodeAt (jB++);
else
break;
}
/*--- Have we reached the end of one or both strings, ending on an ignore char?
The strings were equal, up to that point.
If one of the strings is NOT an ignore char, while the other is, it wins.
*/
if (bIgnoreForPrimarySort (cA))
{
if (! bIgnoreForPrimarySort (cB)) return -1;
}
else if (bIgnoreForPrimarySort (cB))
{
return 1;
}
else
{
if (aSortOrder[cA] > aSortOrder[cB])
return 1;
if (aSortOrder[cA] < aSortOrder[cB])
return -1;
//--- We are equal, so far, on the main chars. Where there ignore chars?
var iBuffSort = iSortIgnoreBuff();
if (iBuffSort) return iBuffSort;
//--- Still here? Reset the ignore arrays.
sIgnoreBuff_A = [];
sIgnoreBuff_B = [];
}
} //-- while (jA < iLenA && jB < iLenB)
/*--- We have gone through all of at least one string and they are still both
equal barring ignore chars or unequal lengths.
*/
var iBuffSort = iSortIgnoreBuff();
if (iBuffSort) return iBuffSort;
//--- All else equal, longest string loses
if (iLenA > iLenB) return 1;
if (iLenA < iLenB) return -1;
return 0;
} //-- function SortByRoughSQL_Latin1_General_CP1_CS_AS
테스트 :
var aPhrases = [
'Grails Found',
'--Exhibit Visitors',
'-Exhibit Visitors',
'Exhibit Visitors',
'Calls Received',
'Ëxhibit Visitors',
'Brochures distributed',
'exhibit visitors',
'bags of Garbage',
'^&$Grails Found',
'?test'
];
aPhrases.sort (SortByRoughSQL_Latin1_General_CP1_CS_AS);
console.log (aPhrases.join ('\n'));
결과 :
?test
^&$Grails Found
bags of Garbage
Brochures distributed
Calls Received
exhibit visitors
Exhibit Visitors
-Exhibit Visitors
--Exhibit Visitors
Ëxhibit Visitors
Grails Found
그래서, 그 질문에서, 당신은에 자바 스크립트를 원하는 현재'A' 유니 코드 이전에'종류의 유니 코드 -A'? –
@Brock - 올바른, 더 구체적으로, 나는 서버 측에서 일치하는 자바 스크립트 정렬 알고리즘을 원합니다. ("-"문자보다 더 많이 고려해야한다고 생각합니다) – DanP