델파이 : Punicode 디코드

Punycode로 인코딩 된 정보를 찾았습니다 : Cyrillic domain name,하지만 어떻게 다시 해독합니까 (키릴 문자로 punycode)합니까?델파이 : Punicode 디코드

2011-12-30 dedoki

링크 된 답변의 링크를 따라주세요 !! –

및? Henri Gourvest는 그 단위로 pynicode로 인코딩하는 함수를 작성했습니다. 그 단위로 pynicode를 디코드하는 기능이 필요합니다! – dedoki

해당 단위의 인코딩 및 디코딩을 참조하십시오. –

링크 된 코드가 작동하지 않습니다. PunycodeDecode 함수가 작동하지 않습니다. 라인 (416) 읽

move(output[i], output[i + 1], (outidx - i) * SizeOf(output^));

이 대신 읽어야 C.에서 잘못 번역 한 것입니다 :

program Punycode; 

{$APPTYPE CONSOLE} 

uses 
    SysUtils, 
    dorPunyCode in 'dorPunyCode.pas'; 

function PEncode(const str: UnicodeString): AnsiString; 
var 
    len: Cardinal; 
begin 
    if str = '' then 
    begin 
    Result := ''; 
    exit; 
    end; 
    if (PunycodeEncode(Length(str), PPunyCode(str), len) = pcSuccess) and (Length(str) + 1 <> len) then 
    begin 
    SetLength(Result, len); 
    PunycodeEncode(Length(str), PPunyCode(str), len, PByte(Result)); 
    end else 
    Result := str; 
end; 

function PDecode(const str: AnsiString): UnicodeString; 
var 
    outputlen: Cardinal; 
begin 
    if str = '' then 
    begin 
    Result := ''; 
    exit; 
    end; 
    outputlen := 0; 
    if (PunycodeDecode(Length(str), PByte(str), outputlen) = pcSuccess) and (Length(str) + 1 <> outputlen) then 
    begin 
    SetLength(Result, outputlen); 
    PunycodeDecode(Length(str), PByte(str), outputlen, PPunycode(Result)); 
    end else 
    Result := str; 
end; 

procedure Test(const Input: UnicodeString); 
begin 
    if PDecode(PEncode(Input))<>Input then 
    raise EAssertionFailed.CreateFmt('Round-trip failed: %s', [Input]); 
end; 

begin 
    Test('http://президент.рф/'); 
    Test('David Heffernan'); 
    Test(''); 
    Test('A'); 
end.

move(output[i], output[i + 1], (outidx - i) * SizeOf(output^[0]));

을 이렇게 변경 한 후, 나는 성공적으로 다음 프로그램을 테스트

그러나 Punycode에 대해서는 정확히 알지 못합니다. 특히 'xn--' 접두어가 링크 된 질문에 어떤 점이 추가되었는지 전혀 알지 못합니다. 따라서 위에 표시된 PEncode 및 PDecode 루틴은 사용자가 필요로하는 것과 정확히 일치하지 않을 수 있습니다.

dorPunyCode 유닛의 델파이 코드에 대해 매우 모호합니다. 다른 문제가 숨어있는 것 같습니다. 귀하의 입장에서 나는 으로 컴파일 한 다음 $L으로 링크하면 punycode.c을 보유하게됩니다. 나는 이것을 dorPunyCode 단위보다 훨씬 더 신뢰할 수 있다고 생각합니다.

출처

2011-12-30 17:35:12

당신은 punycode.c를 어디에서 얻을 수 있는지, 그리고 "$ L"로 연결하는 방법에 대해 더 많이 말할 수 있습니다. 이 만남을 처음 접했습니다. – dedoki

델파이 상단에 링크가 있습니다. punycode에 대해 많이 알고 있습니까? –

[GNI IDN 라이브러리] (http://www.gnu.org/software/libidn/)에는 지침과 예제가 있습니다. [This] (http://git.savannah.gnu.org/cgit/libidn.git/tree/examples/example.c)는 libIDN 및 [here] (http : //git.savannah.gnu)를 컴파일하는 방법을 보여줍니다. .org/cgit/libidn.git/tree/examples/example2.c)는 punycode를 사용하는 방법의 예입니다. – shamp00

이 코드에는 몇 가지 버그가 있습니다. 여기

는 그것을위한 테스트입니다 :

program PunyCodeTest; 

uses 
    Vcl.Dialogs, 
    SysUtils, 
    PunyCode in '..\SRC\PunyCode.pas'; 

type 
    TCodecTestRec = record 
    Decoded: AnsiString; 
    Encoded: AnsiString; 
    end; 

    TDomainTestRec = record 
    Decoded: WideString; 
    Encoded: AnsiString; 
    end; 

const 
    CodecTestCases: array [0..19] of TCodecTestRec = (
    // My samples 
    // ---------- 
    // 蒙古火锅-test 
    (Decoded: 'u+8499 u+53E4 u+706B u+9505 u+002D u+0074 u+0065 u+0073 u+0074'; 
    Encoded: '-test-xt8h571o0z7ad54a'), 

    // RFC 3492 - 7.1 Sample strings 
    // ----------------------------- 
    // (A) Arabic (Egyptian): 
    (Decoded: 'u+0644 u+064A u+0647 u+0645 u+0627 u+0628 u+062A u+0643 u+0644 '+ 
     'u+0645 u+0648 u+0634 u+0639 u+0631 u+0628 u+064A u+061F'; 
    Encoded: 'egbpdaj6bu4bxfgehfvwxn'), 

    //(B) Chinese (simplified): 
    (Decoded: 'u+4ED6 u+4EEC u+4E3A u+4EC0 u+4E48 u+4E0D u+8BF4 u+4E2D u+6587'; 
    Encoded: 'ihqwcrb4cv8a8dqg056pqjye'), 

    // (C) Chinese (traditional): 
    (Decoded: 'u+4ED6 u+5011 u+7232 u+4EC0 u+9EBD u+4E0D u+8AAA u+4E2D u+6587'; 
    Encoded: 'ihqwctvzc91f659drss3x8bo0yb'), 

    // (D) Czech: Pro<ccaron>prost<ecaron>nemluv<iacute><ccaron>esky 
    (Decoded: 'U+0050 u+0072 u+006F u+010D u+0070 u+0072 u+006F u+0073 u+0074 '+ 
     'u+011B u+006E u+0065 u+006D u+006C u+0075 u+0076 u+00ED u+010D u+0065 '+ 
     'u+0073 u+006B u+0079'; 
    Encoded: 'Proprostnemluvesky-uyb24dma41a'), 

    // (E) Hebrew: 
    (Decoded: 'u+05DC u+05DE u+05D4 u+05D4 u+05DD u+05E4 u+05E9 u+05D5 u+05D8 '+ 
     'u+05DC u+05D0 u+05DE u+05D3 u+05D1 u+05E8 u+05D9 u+05DD u+05E2 u+05D1 '+ 
     'u+05E8 u+05D9 u+05EA'; 
    Encoded: '4dbcagdahymbxekheh6e0a7fei0b'), 

    // (F) Hindi (Devanagari): 
    (Decoded: 'u+092F u+0939 u+0932 u+094B u+0917 u+0939 u+093F u+0928 u+094D '+ 
     'u+0926 u+0940 u+0915 u+094D u+092F u+094B u+0902 u+0928 u+0939 u+0940 '+ 
     'u+0902 u+092C u+094B u+0932 u+0938 u+0915 u+0924 u+0947 u+0939 u+0948 '+ 
     'u+0902'; 
    Encoded: 'i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd'), 

    // (G) Japanese (kanji and hiragana): 
    (Decoded: 'u+306A u+305C u+307F u+3093 u+306A u+65E5 u+672C u+8A9E u+3092 '+ 
     'u+8A71 u+3057 u+3066 u+304F u+308C u+306A u+3044 u+306E u+304B'; 
    Encoded: 'n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa'), 

    // (H) Korean (Hangul syllables): 
    (Decoded: 'u+C138 u+ACC4 u+C758 u+BAA8 u+B4E0 u+C0AC u+B78C u+B4E4 u+C774 '+ 
     'u+D55C u+AD6D u+C5B4 u+B97C u+C774 u+D574 u+D55C u+B2E4 u+BA74 u+C5BC '+ 
     'u+B9C8 u+B098 u+C88B u+C744 u+AE4C'; 
    Encoded: '989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c'), 

    // (I) Russian (Cyrillic): 
    (Decoded: 'U+043F u+043E u+0447 u+0435 u+043C u+0443 u+0436 u+0435 u+043E '+ 
     'u+043D u+0438 u+043D u+0435 u+0433 u+043E u+0432 u+043E u+0440 u+044F '+ 
     'u+0442 u+043F u+043E u+0440 u+0443 u+0441 u+0441 u+043A u+0438'; 
    Encoded: 'b1abfaaepdrnnbgefbaDotcwatmq2g4l'), 

    // (J) Spanish: Porqu<eacute>nopuedensimplementehablarenEspa<ntilde>ol 
    (Decoded: 'U+0050 u+006F u+0072 u+0071 u+0075 u+00E9 u+006E u+006F u+0070 '+ 
     'u+0075 u+0065 u+0064 u+0065 u+006E u+0073 u+0069 u+006D u+0070 u+006C '+ 
     'u+0065 u+006D u+0065 u+006E u+0074 u+0065 u+0068 u+0061 u+0062 u+006C '+ 
     'u+0061 u+0072 u+0065 u+006E U+0045 u+0073 u+0070 u+0061 u+00F1 u+006F '+ 
     'u+006C'; 
    Encoded: 'PorqunopuedensimplementehablarenEspaol-fmd56a'), 

    // (K) Vietnamese: 
    // T<adotbelow>isaoh<odotbelow>kh<ocirc>ngth<ecirchookabove>ch\ 
    // <ihookabove>n<oacute>iti<ecircacute>ngVi<ecircdotbelow>t 
    (Decoded: 'U+0054 u+1EA1 u+0069 u+0073 u+0061 u+006F u+0068 u+1ECD u+006B '+ 
     'u+0068 u+00F4 u+006E u+0067 u+0074 u+0068 u+1EC3 u+0063 u+0068 u+1EC9 '+ 
     'u+006E u+00F3 u+0069 u+0074 u+0069 u+1EBF u+006E u+0067 U+0056 u+0069 '+ 
     'u+1EC7 u+0074'; 
    Encoded: 'TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g'), 

    // (L) 3<nen>B<gumi><kinpachi><sensei> 
    (Decoded: 'u+0033 u+5E74 U+0042 u+7D44 u+91D1 u+516B u+5148 u+751F'; 
    Encoded: '3B-ww4c5e180e575a65lsy2b'), 

    // (M) <amuro><namie>-with-SUPER-MONKEYS 
    (Decoded: 'u+5B89 u+5BA4 u+5948 u+7F8E u+6075 u+002D u+0077 u+0069 u+0074 '+ 
     'u+0068 u+002D U+0053 U+0055 U+0050 U+0045 U+0052 u+002D U+004D U+004F '+ 
     'U+004E U+004B U+0045 U+0059 U+0053'; 
    Encoded: '-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n'), 

    // (N) Hello-Another-Way-<sorezore><no><basho> 
    (Decoded: 'U+0048 u+0065 u+006C u+006C u+006F u+002D U+0041 u+006E u+006F '+ 
     'u+0074 u+0068 u+0065 u+0072 u+002D U+0057 u+0061 u+0079 u+002D u+305D '+ 
     'u+308C u+305E u+308C u+306E u+5834 u+6240'; 
    Encoded: 'Hello-Another-Way--fc4qua05auwb3674vfr0b'), 

    // (O) <hitotsu><yane><no><shita>2 
    (Decoded: 'u+3072 u+3068 u+3064 u+5C4B u+6839 u+306E u+4E0B u+0032'; 
    Encoded: '2-u9tlzr9756bt3uc0v'), 

    // (P) Maji<de>Koi<suru>5<byou><mae> 
    (Decoded: 'U+004D u+0061 u+006A u+0069 u+3067 U+004B u+006F u+0069 u+3059 '+ 
     'u+308B u+0035 u+79D2 u+524D'; 
    Encoded: 'MajiKoi5-783gue6qz075azm5e'), 

    // (Q) <pafii>de<runba> 
    (Decoded: 'u+30D1 u+30D5 u+30A3 u+30FC u+0064 u+0065 u+30EB u+30F3 u+30D0'; 
    Encoded: 'de-jg4avhby1noc0d'), 

    // (R) <sono><supiido><de> 
    (Decoded: 'u+305D u+306E u+30B9 u+30D4 u+30FC u+30C9 u+3067'; 
    Encoded: 'd9juau41awczczp'), 

    // (S) -> $1.00 <- 
    (Decoded: 'u+002D u+003E u+0020 u+0024 u+0031 u+002E u+0030 u+0030 u+0020 '+ 
     'u+003C u+002D'; 
    Encoded: '-> $1.00 <--') 
); 

    DomainTestCases: array [0..2] of TDomainTestRec = (
    (Decoded: '蒙古火锅-test.cn'; Encoded: 'xn---test-xt8h571o0z7ad54a.cn'), 
    (Decoded: 'президент.рф'; Encoded: 'xn--d1abbgf6aiiy.xn--p1ai'), 
    (Decoded: 'почта.мд'; Encoded: 'xn--80a1acny.xn--d1ap') 
); 

const 
    UnicodeMaxLength = 256; 
    AceMaxLength = 256; 

function ExtractCodePoint(S: AnsiString; var Off: Integer; out caseflag: Byte; out codept: TPunyCode): Boolean; 

    procedure InvalidInput; 
    begin 
    raise EAssertionFailed.Create('Invalid input'); 
    end; 

begin 
    Result := False; 
    if Off > Length(S) then Exit; 
    if Off+5 > Length(S) then InvalidInput; 

    if S[Off] = 'u' then caseflag := 0 
    else if S[Off] = 'U' then caseflag := 1 
    else InvalidInput; 
    Inc(Off); 

    if S[Off] <> '+' then InvalidInput; 
    Inc(Off); 

    codept := StrToInt('$'+Copy(S, Off, 4)); 
    Inc(Off, 4); 

    if (Off <= Length(S)) and (S[Off] <> ' ') then InvalidInput; 
    Inc(Off); 

    Result := True; 
end; 

procedure TestEncoder(Decoded, Encoded: AnsiString); 
var 
    inlen, outlen: Cardinal; 
    caseflags: array [0..UnicodeMaxLength-1] of Byte; 
    input: array [0..UnicodeMaxLength-1] of TPunyCode; 
    output: array [0..AceMaxLength-1] of Byte; 
    Off: Integer; 
    caseflag: Byte; 
    codept: TPunyCode; 
begin 
    inlen := 0; 
    Off := 1; 
    while ExtractCodePoint(Decoded, Off, caseflag, codept) do 
    begin 
    caseflags[inlen] := caseflag; 
    input[inlen] := codept; 
    Inc(inlen); 
    end; 

    outlen := AceMaxLength; 
    if (PunycodeEncode(inlen, @input, outlen, @output, @caseflags) <> pcSuccess) or 
    (outlen <> Cardinal(Length(Encoded))) or 
    not CompareMem(@output, @Encoded[1], outlen) then 
    raise EAssertionFailed.CreateFmt('Encoding failed: %s', [Decoded]); 
end; 

function MakeDecoded(outlen: Cardinal; output: PPunycode; caseflags: PByte): AnsiString; 
var 
    _caseflags: PByteArray absolute caseflags; 
    I: Integer; 
    S: AnsiString; 
begin 
    Result := ''; 
    for I := 0 to outlen - 1 do 
    begin 
    if _caseflags[I] = 0 then S := 'u+' 
    else S := 'U+'; 
    S := S + IntToHex(output[I], 4); 
    if Result = '' then Result := S 
    else Result := Result + ' ' + S; 
    end; 
end; 

procedure TestDecoder(Decoded, Encoded: AnsiString); 
var 
    inlen, outlen: Cardinal; 
    caseflags: array [0..UnicodeMaxLength-1] of Byte; 
    output: array [0..UnicodeMaxLength-1] of TPunyCode; 
begin 
    inlen := Length(Encoded); 
    if inlen > AceMaxLength then 
    raise EAssertionFailed.CreateFmt('Input is too big: %s', [Encoded]); 

    outlen := UnicodeMaxLength; 
    if (PunycodeDecode(inlen, PByte(@Encoded[1]), outlen, @output, @caseflags) <> pcSuccess) or 
    (MakeDecoded(outlen, @output, @caseflags) <> Decoded) then 
    raise EAssertionFailed.CreateFmt('Decoding failed: %s', [Encoded]); 
end; 

procedure TestCodec(Decoded, Encoded: AnsiString); 
begin 
    TestEncoder(Decoded, Encoded); 
    TestDecoder(Decoded, Encoded); 
end; 

procedure TestCodecTestCases; 
var 
    I: Integer; 
begin 
    for I := 0 to High(CodecTestCases) do 
    TestCodec(CodecTestCases[I].Decoded, CodecTestCases[I].Encoded); 
end; 

procedure TestDomain(Decoded: WideString; Encoded: AnsiString); 
begin 
    if PunycodeEncodeDomain(Decoded) <> Encoded then 
    raise EAssertionFailed.CreateFmt('Encoding failed: %s', [Decoded]); 
    if PunycodeDecodeDomain(Encoded) <> Decoded then 
    raise EAssertionFailed.CreateFmt('Decoding failed: %s', [Encoded]); 
end; 

procedure TestDomainTestCases; 
var 
    I: Integer; 
begin 
    for I := 0 to High(DomainTestCases) do 
    TestDomain(DomainTestCases[I].Decoded, DomainTestCases[I].Encoded); 
end; 

begin 
    TestCodecTestCases; 
    TestDomainTestCases; 
    MessageDlg('Punycode was successfully tested', mtInformation, [mbOK], 0); 
end.

출처

2013-08-17 10:24:02

을 그리고 여기에 추가 PunycodeDecodeDomain에 고정 된 버전입니다. Dephi 2007 및 Delphi XE3 (x86 및 x64)에서 테스트되었습니다 :

(* 
* punycode.c from RFC 3492prop 
* http://www.nicemice.net/idn/ 
* Adam M. Costello 
* http://www.nicemice.net/amc/ 
* 
* This is ANSI C code (C89) implementing Punycode (RFC 3492prop). 
* Delphi Conversion by: 
* Henri Gourvest <[email protected]> 
* http://www.progdigy.com 
* contributor 
* J. Heffernan <[email protected]> 
* testing, fixing and refactoring 
* Igor Tsurcanovsky <[email protected]> 

usage: 

function PEncode(const str: UnicodeString): AnsiString; 
var 
    len: Cardinal; 
begin 
    if str = '' then 
    begin 
    Result := ''; 
    exit; 
    end; 
    if (PunycodeEncode(Length(str), PPunyCode(str), len) = pcSuccess) and (Length(str) + 1 <> len) then 
    begin 
    SetLength(Result, len); 
    PunycodeEncode(Length(str), PPunyCode(str), len, PByte(Result)); 
    end else 
    Result := str; 
end; 

function PDecode(const str: AnsiString): UnicodeString; 
var 
    outputlen: Cardinal; 
begin 
    if str = '' then 
    begin 
    Result := ''; 
    exit; 
    end; 
    outputlen := 0; 
    if (PunycodeDecode(Length(str), PByte(str), outputlen) = pcSuccess) and (Length(str) <> outputlen) then 
    begin 
    SetLength(Result, outputlen); 
    PunycodeDecode(Length(str), PByte(str), outputlen, PPunycode(Result)); 
    end else 
    Result := str; 
end; 

procedure Test(const Input: UnicodeString); 
begin 
    if PDecode(PEncode(Input))<>Input then 
    raise EAssertionFailed.CreateFmt('Round-trip failed: %s', [Input]); 
end; 

begin 
    Test('президент'); 
    Test('David Heffernan'); 
    Test(''); 
    Test('A'); 
end. 

*) 

unit PunyCode; 

interface 

type 
    {$if (SizeOf(Char) = 1)} 
    // For compatibility with versions without UnicodeString (prior Delphi 2009) 
    UnicodeString = WideString; 
    {$ifend} 

    TPunyCodeStatus = (
    pcSuccess, 
    pcBadInput, (* Input is invalid.      *) 
    pcBigOutput, (* Output would exceed the space provided. *) 
    pcOverflow (* Input needs wider integers to process. *) 
); 

    TPunyCode = Word; 
    TPunyCodeArray = array[0..(High(Integer) div SizeOf(TPunyCode)) - 1] of TPunyCode; 
    PPunycode = ^TPunyCodeArray; 

function PunycodeDecode(inputlen: Cardinal; const input: PByte; 
    var outputlen: Cardinal; output: PPunycode = nil; 
    caseflags: PByte = nil): TPunyCodeStatus; 

function PunycodeEncode(inputlen: Cardinal; const input: PPunycode; 
    var outputlen: Cardinal; const output: PByte = nil; 
    const caseflags: PByte = nil): TPunyCodeStatus; overload; 

function PunycodeDecodeDomain(const str: AnsiString): UnicodeString; 
function PunycodeEncodeDomain(const str: UnicodeString): AnsiString; 

implementation 

uses SysUtils; 

type 
    PByteArray = ^TByteArray; 
    TByteArray = array [0..MaxInt-1] of Byte; 

(*** Bootstring parameters for Punycode ***) 
const 
    PUNY_BASE = 36; 
    PUNY_TMIN = 1; 
    PUNY_TMAX = 26; 
    PUNY_SKEW = 38; 
    PUNY_DAMP = 700; 
    PUNY_INITIAL_BIAS = 72; 
    PUNY_INITIAL_N = $80; 
    PUNY_DELIMITER = $2D; 

    // typedef unsigned int punycode_uint; 
    // /* maxint is the maximum value of a punycode_uint variable: */ 
    // static const punycode_uint maxint = -1; 
    // /* Because maxint is unsigned, -1 becomes the maximum value. */ 
    PUNY_maxint = High(Cardinal); 


(* flagged(bcp) tests whether a basic code point is flagged *) 
(* (uppercase). The behavior is undefined if bcp is not a *) 
(* basic code point.          *) 

function PUNY_flagged(bcp: Cardinal): Byte; inline; 
begin 
    Result := Ord(bcp - 65 < 26); 
end; 

(* DecodeDigit(cp) returns the numeric value of a basic code *) 
(* point (for use in representing integers) in the range 0 to *) 
(* BASE-1, or BASE if cp is does not represent a value.  *) 

function PUNY_DecodeDigit(cp: Cardinal): Cardinal; inline; 
begin 
    if (cp - 48 < 10) then 
    Result := cp - 22 
    else if (cp - 65 < 26) then 
    Result := cp - 65 
    else if (cp - 97 < 26) then 
    Result := cp - 97 
    else 
    Result := PUNY_BASE; 
end; 

(* EncodeDigit(d,flag) returns the basic code point whose value  *) 
(* (when used for representing integers) is d, which needs to be in *) 
(* the range 0 to BASE-1. The lowercase form is used unless flag is *) 
(* nonzero, in which case the uppercase form is used. The behavior *) 
(* is undefined if flag is nonzero and digit d has no uppercase form. *) 

function PUNY_EncodeDigit(d: Cardinal; flag: Boolean): Byte; inline; 
begin 
    Result := d + 22 + 75 * Ord(d < 26) - (Ord(flag) shl 5); 
    (* 0..25 map to ASCII a..z or A..Z *) 
    (* 26..35 map to ASCII 0..9   *) 
end; 

(* EncodeBasic(bcp,flag) forces a basic code point to lowercase *) 
(* if flag is zero, uppercase if flag is nonzero, and returns *) 
(* the resulting code point. The code point is unchanged if it *) 
(* is caseless. The behavior is undefined if bcp is not a basic *) 
(* code point.             *) 

function PUNY_EncodeBasic(bcp: Cardinal; flag: Integer): Byte; inline; 
begin 
    Dec(bcp, Ord(bcp - 97 < 26) shl 5); 
    Result := bcp + (((not flag) and Ord(bcp - 65 < 26)) shl 5); 
end; 

(*** Bias adaptation function ***) 

function PUNY_Adapt(delta, numpoints: Cardinal; firsttime: Boolean): Cardinal; inline; 
var 
    k: TPunyCode; 
begin 
    if firsttime then 
    delta := delta div PUNY_DAMP 
    else 
    delta := delta shr 1; 

    (* delta shr 1 is a faster way of doing delta div 2 *) 
    Inc(delta, delta div numpoints); 

    k := 0; 
    while (delta > ((PUNY_BASE - PUNY_TMIN) * PUNY_TMAX) div 2) do 
    begin 
    delta := delta div (PUNY_BASE - PUNY_TMIN); 
    Inc(k, PUNY_BASE); 
    end; 

    Result := k + (PUNY_BASE - PUNY_TMIN + 1) * delta div (delta + PUNY_SKEW); 
end; 

(* PunycodeEncode() converts Unicode to Punycode. The input  *) 
(* is represented as an array of Unicode code points (not code *) 
(* units; surrogate pairs are not allowed), and the output  *) 
(* will be represented as an array of ASCII code points. The  *) 
(* output string is *not* null-terminated; it will contain  *) 
(* zeros if and only if the input contains zeros. (Of course  *) 
(* the caller can leave room for a terminator and add one if  *) 
(* needed.) The inputlen is the number of code points in   *) 
(* the input. The outputlen is an in/out argument: the   *) 
(* caller passes in the maximum number of code points that it  *) 
(* can receive, and on successful return it will contain the  *) 
(* number of code points actually output. The case_flags array *) 
(* holds input_length boolean values, where nonzero suggests that *) 
(* the corresponding Unicode character be forced to uppercase  *) 
(* after being decoded (if possible), and zero suggests that  *) 
(* it be forced to lowercase (if possible). ASCII code points *) 
(* are encoded literally, except that ASCII letters are forced *) 
(* to uppercase or lowercase according to the corresponding  *) 
(* uppercase flags. If case_flags is a null pointer then ASCII *) 
(* letters are left as they are, and other code points are  *) 
(* treated as if their uppercase flags were zero. The return  *) 
(* value can be any of the TPunyCodeStatus values defined above *) 
(* except pcBadInput; if not pcSuccess, then  *) 
(* output_size and output might contain garbage.     *) 

function PunycodeEncode(inputlen: Cardinal; const input: PPunycode; 
    var outputlen: Cardinal; const output: PByte = nil; 
    const caseflags: PByte = nil): TPunyCodeStatus; 
var 
    outidx, maxout, n, delta, h, b, bias, m, q, k, t: Cardinal; 
    j: Integer; 
    _output: PByteArray absolute output; 
    _caseflags: PByteArray absolute caseflags; 
begin 
    (* Initialize the state: *) 

    n := PUNY_INITIAL_N; 
    outidx := 0; 
    delta := outidx; 
    maxout := outputlen; 
    bias := PUNY_INITIAL_BIAS; 

    (* Handle the basic code points: *) 

    for j := 0 to inputlen - 1 do 
    begin 
    if (input[j] < $80) then 
    begin 
     if (output <> nil) then 
     begin 
     if (maxout - outidx < 2) then 
     begin 
      Result := pcBigOutput; 
      Exit; 
     end; 
     if (caseflags <> nil) then 
      _output[outidx] := PUNY_EncodeBasic(input[j], _caseflags[j]) 
     else 
      _output[outidx] := input[j]; 
     end; 

     Inc(outidx); 
    end; 
    (* else if (input[j] < n) return pcBadInput; *) 
    (* (not needed for Punycode with unsigned code points) *) 
    end; 

    b := outidx; 
    h := b; 

    (* h is the number of code points that have been handled, b is the *) 
    (* number of basic code points, and out is the number of characters *) 
    (* that have been output. *) 

    if (b > 0) then 
    begin 
    if (output <> nil) then 
     _output[outidx] := PUNY_DELIMITER; 
    Inc(outidx); 
    end; 

    (* Main encoding loop: *) 

    while (h < inputlen) do 
    begin 
    (* All non-basic code points < n have been *) 
    (* handled already. Find the next larger one: *) 

    m := PUNY_maxint; 
    for j := 0 to inputlen - 1 do 
     (* if (basic(input[j])) continue; *) 
     (* (not needed for Punycode) *) 
     if ((input[j] >= n) and (input[j] < m)) then 
     m := input[j]; 

    (* Increase delta enough to advance the decoder's *) 
    (* <n,i> state to <m,0>, but guard against overflow: *) 

    if (m - n > (PUNY_maxint - delta) div (h + 1)) then 
    begin 
     Result := pcOverflow; 
     Exit; 
    end; 
    Inc(delta, (m - n) * (h + 1)); 
    n := m; 

    for j := 0 to inputlen - 1 do 
    begin 
     (* Punycode does not need to check whether input[j] is basic: *) 
     if (input[j] < n (* or basic(input[j]) *)) then 
     begin 
     Inc(delta); 
     if (delta = 0) then 
     begin 
      Result := pcOverflow; 
      Exit; 
     end; 
     end; 

     if (input[j] = n) then 
     begin 
     (* Represent delta as a generalized variable-length integer: *) 

     q := delta; 
     k := PUNY_BASE; 
     while true do 
     begin 
      if (output <> nil) then 
      if (outidx >= maxout) then 
      begin 
       Result := pcBigOutput; 
       Exit; 
      end; 
      if k <= bias (* + TMIN *) then (* +TMIN not needed *) 
      t := PUNY_TMIN 
      else if k >= bias + PUNY_TMAX then 
      t := PUNY_TMAX 
      else 
      t := k - bias; 
      if (q < t) then 
      break; 
      if (output <> nil) then 
      _output[outidx] := PUNY_EncodeDigit(t + (q - t) mod (PUNY_BASE - t), False); 
      Inc(outidx); 
      q := (q - t) div (PUNY_BASE - t); 
      Inc(k, PUNY_BASE); 
     end; 
     if (output <> nil) then 
      _output[outidx] := PUNY_EncodeDigit(q, 
      (caseflags <> nil) and (_caseflags[j] <> 0)); 
     Inc(outidx); 
     bias := PUNY_Adapt(delta, h + 1, h = b); 
     delta := 0; 
     Inc(h); 
     end; 
    end; 

    Inc(delta); 
    Inc(n); 
    end; 

    outputlen := outidx; 
    Result := pcSuccess; 
end; 

(* PunycodeDecode() converts Punycode to Unicode. The input is *) 
(* represented as an array of ASCII code points, and the output *) 
(* will be represented as an array of Unicode code points. The *) 
(* input_length is the number of code points in the input. The *) 
(* output_length is an in/out argument: the caller passes in  *) 
(* the maximum number of code points that it can receive, and  *) 
(* on successful return it will contain the actual number of  *) 
(* code points output. The case_flags array needs room for at *) 
(* least output_length values, or it can be a null pointer if the *) 
(* case information is not needed. A nonzero flag suggests that *) 
(* the corresponding Unicode character be forced to uppercase  *) 
(* by the caller (if possible), while zero suggests that it be *) 
(* forced to lowercase (if possible). ASCII code points are  *) 
(* output already in the proper case, but their flags will be set *) 
(* appropriately so that applying the flags would be harmless. *) 
(* The return value can be any of the TPunyCodeStatus values  *) 
(* defined above; if not pcSuccess, then output_length, *) 
(* output, and case_flags might contain garbage. On success, the *) 
(* decoder will never need to write an output_length greater than *) 
(* input_length, because of how the encoding is defined.   *) 

function PunycodeDecode(inputlen: Cardinal; const input: PByte; 
    var outputlen: Cardinal; output: PPunycode; 
    caseflags: PByte): TPunyCodeStatus; 
var 
    outidx, i, maxout, bias, b, inidx, oldi, w, k, digit, t, n : Cardinal; 
    j: Integer; 
    _input: PByteArray absolute input; 
    _caseflags: PByteArray absolute caseflags; 
begin 

    (* Initialize the state: *) 

    n := PUNY_INITIAL_N; 
    outidx := 0; 
    i := outidx; 
    maxout := outputlen; 
    bias := PUNY_INITIAL_BIAS; 

    (* Handle the basic code points: Let b be the number of input code *) 
    (* points before the last DELIMITER, or 0 if there is none, then *) 
    (* copy the first b code points to the output. *) 

    b := 0; 
    for j := 0 to inputlen - 1 do 
    if _input[j] = PUNY_DELIMITER then 
     b := j; 

    if output <> nil then 
    if (b > maxout) then 
    begin 
     Result := pcBigOutput; 
     Exit; 
    end; 

    for j := 0 to b - 1 do 
    begin 
    if (caseflags <> nil) then 
     _caseflags[outidx] := PUNY_flagged(_input[j]); 
    if (_input[j] >= $80) then 
    begin 
     Result := pcBadInput; 
     Exit; 
    end; 
    if output <> nil then 
     output[outidx] := _input[j]; 
    Inc(outidx); 
    end; 

    (* Main decoding loop: Start just after the last DELIMITER if any *) 
    (* basic code points were copied; start at the beginning otherwise. *) 

    if (b > 0) then 
    inidx := b + 1 
    else 
    inidx := 0; 

    while inidx < inputlen do 
    begin 
    (* in is the index of the next character to be consumed, and *) 
    (* out is the number of code points in the output array. *) 

    (* Decode a generalized variable-length integer into delta, *) 
    (* which gets added to i. The overflow checking is easier *) 
    (* if we increase i as we go, then subtract off its starting *) 
    (* value at the end to obtain delta. *) 

    oldi := i; 
    w := 1; 
    k := PUNY_BASE; 
    while true do 
    begin 
     if (inidx >= inputlen) then 
     begin 
     Result := pcBadInput; 
     Exit; 
     end; 
     digit := PUNY_DecodeDigit(_input[inidx]); 
     Inc(inidx); 
     if (digit >= PUNY_BASE) then 
     begin 
     Result := pcBadInput; 
     Exit; 
     end; 
     if (digit > (PUNY_maxint - i) div w) then 
     begin 
     Result := pcOverflow; 
     Exit; 
     end; 
     Inc(i, digit * w); 
     if k <= bias (* + TMIN *) then 
     t := PUNY_TMIN 
     else (* +TMIN not needed *) 
     if k >= bias + PUNY_TMAX then 
     t := PUNY_TMAX 
     else 
     t := k - bias; 
     if (digit < t) then 
     break; 
     if (w > (PUNY_maxint div (PUNY_BASE - t))) then 
     begin 
     Result := pcOverflow; 
     Exit; 
     end; 
     w := w * (PUNY_BASE - t); 
     Inc(k, PUNY_BASE); 
    end; 

    bias := PUNY_Adapt(i - oldi, outidx + 1, oldi = 0); 

    (* i was supposed to wrap around from out+1 to 0, *) 
    (* incrementing n each time, so we'll fix that now: *) 

    if (i div (outidx + 1) > PUNY_maxint - n) then 
    begin 
     Result := pcOverflow; 
     Exit; 
    end; 
    Inc(n, i div (outidx + 1)); 
    i := i mod (outidx + 1); 

    (* Insert n at position i of the output: *) 

    (* not needed for Punycode: *) 
    (* if (DecodeDigit(n) <= BASE) return punycode_invalid_input; *) 
    if output <> nil then 
     if (outidx >= maxout) then 
     begin 
     Result := pcBigOutput; 
     Exit; 
     end; 

    if (caseflags <> nil) then 
    begin 
     move(_caseflags[i], _caseflags[i + 1], outidx - i); 

     (* Case of last character determines uppercase flag: *) 
     _caseflags[i] := PUNY_flagged(_input[inidx - 1]); 
    end; 

    if output <> nil then 
    begin 
     move(output[i], output[i + 1], (outidx - i) * SizeOf(TPunyCode)); 
     output[i] := n; 
    end; 
    Inc(i); 

    Inc(outidx); 
    end; 

    outputlen := outidx; 
    Result := pcSuccess; 
end; 

function PunycodeDecodeDomain(const str: AnsiString): UnicodeString; 
var 
    p, s: PAnsiChar; 

    procedure DoIt(dot: Boolean); 
    var 
    inlen, outlen: Cardinal; 
    unicode: UnicodeString; 
    u: PWideChar; 
    begin 
    inlen := p - s; 
    if (inlen > 4) and (StrLIComp(s, 'xn--', 4) = 0) and 
     (PunycodeDecode(inlen-4, PByte(@s[4]), outlen) = pcSuccess) then 
    begin 
     if dot then 
     SetLength(unicode, outlen + 1) 
     else 
     SetLength(unicode, outlen); 
     u := PWideChar(unicode); 
     PunycodeDecode(inlen-4, PByte(@s[4]), outlen, PPunyCode(u)); 
     if dot then 
     begin 
     inc(u, outlen); 
     u^ := '.'; 
     end; 
    end else 
     if dot then 
     SetString(unicode, s, inlen + 1) 
     else 
     SetString(unicode, s, inlen); 
    Result := Result + unicode; 
    end; 

begin 
    Result := ''; 
    p := PAnsiChar(str); 
    s := p; 

    while True do 
    case p^ of 
    '.': 
     begin 
     DoIt(True); 
     Inc(p); 
     s := p; 
     end; 
    #0 : 
     begin 
     DoIt(False); 
     Break; 
     end; 
    else 
    Inc(p); 
    end; 
end; 

function PunycodeEncodeDomain(const str: UnicodeString): AnsiString; 
var 
    p, s: PWideChar; 

    procedure DoIt(dot: Boolean); 
    var 
    inlen, outlen: Cardinal; 
    ansi: AnsiString; 
    a: PAnsiChar; 
    begin 
    inlen := p - s; 
    if (PunycodeEncode(inlen, PPunyCode(s), outlen) = pcSuccess) and (inlen + 1 <> outlen) then 
    begin 
     if dot then 
     SetLength(ansi, outlen + 4 + 1) 
     else 
     SetLength(ansi, outlen + 4); 
     a := PAnsiChar(ansi); 
     Move(PAnsiChar('xn--')^, a^, 4); 
     inc(a, 4); 
     PunycodeEncode(inlen, PPunyCode(s), outlen, PByte(a)); 
     if dot then 
     begin 
     inc(a, outlen); 
     a^ := '.'; 
     end; 
    end else 
     if dot then 
     SetString(ansi, s, inlen + 1) 
     else 
     SetString(ansi, s, inlen); 
    Result := Result + ansi; 
    end; 

begin 
    Result := ''; 
    p := PWideChar(str); 
    s := p; 

    while True do 
    case p^ of 
    '.': 
     begin 
     DoIt(True); 
     Inc(p); 
     s := p; 
     end; 
    #0 : 
     begin 
     DoIt(False); 
     Break; 
     end; 
    else 
    Inc(p); 
    end; 
end; 

end.

출처

2013-08-17 10:31:24

델파이 : Punicode 디코드

답변

관련 문제