fix: unicode support for VFS files

This commit is contained in:
Massimo Melina 2020-05-03 23:42:33 +02:00
parent 384d2d10f4
commit cec3ebb4cb
5 changed files with 90 additions and 78 deletions

View File

@ -193,12 +193,12 @@ type
Ttlv = class
protected
cur, bound: integer;
whole, lastValue: ansistring;
whole, lastRaw: ansistring;
stack: array of integer;
stackTop: integer;
public
procedure parse(data:ansistring);
function pop(var value:ansistring):integer;
function pop(var value:string; var raw:ansiString):integer;
function down():boolean;
function up():boolean;
function getTotal():integer;
@ -208,6 +208,8 @@ type
function getTheRest():ansistring;
end;
const TLV_UTF8_FLAG = $1000000;
implementation
uses
@ -1144,7 +1146,7 @@ bound:=length(data);
stackTop:=0;
end; // parse
function Ttlv.pop(var value:ansistring):integer;
function Ttlv.pop(var value:string; var raw:ansistring):integer;
var
n: integer;
begin
@ -1152,8 +1154,15 @@ result:=-1;
if isOver() then exit; // finished
result:=integer((@whole[cur])^);
n:=Pinteger(@whole[cur+4])^;
value:=copy(whole, cur+8, n);
lastValue:=value;
raw:=copy(whole, cur+8, n);
lastRaw:=raw;
if result and TLV_UTF8_FLAG = 0 then
value:=string(raw)
else
begin
dec(result, TLV_UTF8_FLAG);
value:=UTF8toString(raw);
end;
inc(cur, 8+n);
end; // pop
@ -1174,7 +1183,7 @@ stack[stackTop]:=bound;
inc(stackTop);
bound:=cur;
dec(cur, length(lastValue));
dec(cur, length(lastRaw));
result:=true;
end; // down

View File

@ -313,9 +313,8 @@ function encodeURL(url:string; nonascii:boolean=TRUE; spaces:boolean=TRUE;
// returns true if address is not suitable for the internet
function isLocalIP(ip:string):boolean;
// base64 encoding
function base64encode(s:string):string;
function base64decode(s:string):string;
function base64decodeA(s:string):Tbytes;
function base64encode(s:ansistring):ansistring;
function base64decode(s:ansistring):ansistring;
// an ip address where we are listening
function getIP():string;
// ensure a string ends with a specific string
@ -445,7 +444,7 @@ begin
until false;
end; // nonQuotedPos
function base64encode(s:string):string;
function base64encode(s:ansistring):ansistring;
const
TABLE='ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/';
type
@ -469,7 +468,7 @@ if length(s) mod 3 > 0 then
+ifThen(length(s) mod 3=1,'==',TABLE[1+(p[1] and 15) shl 2+p[2] shr 6]+'=');
end; // base64encode
function base64decode(s:string):string;
function base64decode(s:ansistring):ansistring;
const
TABLE:array[#43..#122] of byte=(
62,0,0,0,63,52,53,54,55,56,57,58,59,60,61,0,0,0,0,0,0,0,0,1,2,3,4,5,6,7,
@ -490,31 +489,6 @@ while i <= length(s) do
end;
end; // base64decode
function base64decodeA(s:string):Tbytes;
const
TABLE:array[#43..#122] of byte=(
62,0,0,0,63,52,53,54,55,56,57,58,59,60,61,0,0,0,0,0,0,0,0,1,2,3,4,5,6,7,
8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,0,0,0,0,0,0,26,27,28,
29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51);
var
i, n: integer;
begin
setLength(result, length(s));
i:=1;
n:=0;
while i <= length(s) do
begin
result[n]:=TABLE[s[i]] shl 2+TABLE[s[i+1]] shr 4;
inc(n);
result[n]:=ifThen(s[i+2]<>'=', TABLE[s[i+1]] shl 4+TABLE[s[i+2]] shr 2);
inc(n);
result[n]:=ifThen(s[i+3]<>'=', TABLE[s[i+2]] shl 6+TABLE[s[i+3]]);
inc(n);
inc(i,4);
end;
setLength(result, n);
end; // base64decode
function decodeURL(url:ansistring; utf8:boolean=TRUE):string;
var
i, j: integer;
@ -549,8 +523,11 @@ function encodeURL(url:string; nonascii:boolean=TRUE; spaces:boolean=TRUE;
var
i: integer;
encodePerc, encodeUni: set of char;
a: ansistring;
begin
result:='';
if url = '' then
exit;
encodeUni:=[];
if nonascii then encodeUni:=[#128..#255];
encodePerc:=[#0..#31,'#','%','?','"','''','&','<','>',':'];
@ -561,6 +538,14 @@ if not unicode then
encodePerc:=encodePerc+encodeUni;
encodeUni:=[];
end;
if nonascii then
begin
a:=UTF8encode(url); // couldn't find a better way to force url to have the UTF8 encoding
i:=length(a);
setLength(url, i);
for i := 1 to i do
url[i]:=char(a[i]);
end;
for i:=1 to length(url) do
if charInSet(url[i], encodePerc) then
result:=result+'%'+intToHex(ord(url[i]),2)

View File

@ -2906,7 +2906,6 @@ object mainFrm: TmainFrm
object encodenonasciiChk: TMenuItem
AutoCheck = True
Caption = 'Encode non-ASCII characters'
Checked = True
end
object encodePwdUrlChk: TMenuItem
AutoCheck = True

View File

@ -2637,8 +2637,6 @@ end; // getIconForTreeview
function encodeURL(s:string; fullEncode:boolean=FALSE):string;
begin
if fullEncode or mainFrm.encodenonasciiChk.checked then
s:=ansiToUTF8(s);
result:=HSlib.encodeURL(s, mainFrm.encodeNonasciiChk.checked,
fullEncode or mainFrm.encodeSpacesChk.checked)
end; // encodeURL
@ -6318,7 +6316,8 @@ type
E_ZIP:
begin
result:=zCompressStr2(s, zcMax);
if length(result) > round(0.95*length(s)) then result:=s;
if length(result) > round(0.95*length(s)) then
result:=s;
result:=base64encode(result);
end;
end;
@ -6634,9 +6633,14 @@ var
end;
end; // loadBanlist
function unzip(s:string):string;
function unzip(s:ansistring):ansistring;
var a: Tbytes;
begin
try result:=ZDecompressStr(base64decodeA(s));
try
s:=base64decode(s);
setLength(a, length(s));
move(s[1], a[0], length(s));
result:=ZDecompressStr(a)
except end;
end; // unzip
@ -9115,6 +9119,7 @@ const
FK_CRC = 6;
FK_COMMENT = 7;
FK_USERPWD = 8;
FK_USERPWD_UTF8 = 108;
FK_ADDEDTIME = 9;
FK_DLCOUNT = 10;
FK_ROOT = 11;
@ -9137,7 +9142,7 @@ const
function Tmainfrm.getVFS(node:Ttreenode=NIL):ansistring;
function getAutoupdatedFiles():string;
function getAutoupdatedFiles():ansistring;
var
i: integer;
fn: string;
@ -9164,7 +9169,7 @@ f:=nodeToFile(node);
commonFields:=TLV(FK_FLAGS, str_(f.flags))
+TLV_NOT_EMPTY(FK_RESOURCE, f.resource)
+TLV_NOT_EMPTY(FK_COMMENT, f.comment)
+if_(f.user>'', TLV(FK_USERPWD, base64encode(f.user+':'+f.pwd)))
+if_(f.user>'', TLV(FK_USERPWD_UTF8, base64encode(f.user+':'+f.pwd)))
+TLV_NOT_EMPTY(FK_ACCOUNTS, join(';',f.accounts[FA_ACCESS]) )
+TLV_NOT_EMPTY(FK_UPLOADACCOUNTS, join(';',f.accounts[FA_UPLOAD]))
+TLV_NOT_EMPTY(FK_DELETEACCOUNTS, join(';',f.accounts[FA_DELETE]))
@ -9211,6 +9216,7 @@ resourcestring
var
data: ansistring;
s: string;
f: Tfile;
after: record
resetLetBrowse: boolean;
@ -9220,17 +9226,18 @@ var
procedure parseAutoupdatedFiles(data:ansistring);
var
s, fn: ansistring;
s, fn: string;
raw: ansiString;
begin
autoupdatedFiles.Clear();
tlv.down();
while tlv.pop(s) = FK_NODE do
while tlv.pop(s,raw) = FK_NODE do
begin
tlv.down();
while not tlv.isOver() do
case tlv.pop(s) of
case tlv.pop(s,raw) of
FK_NAME: fn:=s;
FK_DLCOUNT: autoupdatedFiles.setInt(fn, int_(s));
FK_DLCOUNT: autoupdatedFiles.setInt(fn, int_(raw));
end;
tlv.up();
end;
@ -9253,7 +9260,7 @@ f.node:=node;
tlv:=Ttlv.create;
tlv.parse(vfs);
while not tlv.isOver() do
case tlv.pop(data) of
case tlv.pop(s,data) of
FK_ROOT:
begin
setVFS(data, rootNode );
@ -9278,7 +9285,8 @@ while not tlv.isOver() do
{ I was unable to reproduce the bug, but i found that correct data doesn't complain if i add an extra #0. }
try
data:=ZDecompressStr(bytesOf(data+#0));
if isAnyMacroIn(data) then loadingVFS.macrosFound:=TRUE;
if isAnyMacroIn(data) then
loadingVFS.macrosFound:=TRUE;
setVFS(data, node);
except msgDlg(MSG_ZLIB, MB_ICONERROR) end;
FK_FORMAT_VER:
@ -9304,15 +9312,15 @@ while not tlv.isOver() do
if msgDlg(MSG_BADCRC+MSG_BETTERSTOP,MB_ICONERROR+MB_YESNO) = IDYES then
exit;
end;
FK_RESOURCE: f.resource:=data;
FK_RESOURCE: f.resource:=s;
FK_NAME:
begin
f.name:=data;
node.text:=data;
f.name:=s;
node.text:=s;
end;
FK_FLAGS: move(data[1], f.flags, length(data));
FK_ADDEDTIME: f.atime:=dt_(data);
FK_COMMENT: f.comment:=data;
FK_COMMENT: f.comment:=s;
FK_USERPWD:
begin
data:=base64decode(data);
@ -9320,17 +9328,24 @@ while not tlv.isOver() do
f.pwd:=data;
usersInVFS.track(f.user, f.pwd);
end;
FK_USERPWD_UTF8:
begin
s:=UTF8toString(base64decode(data));
f.user:=chop(':',s);
f.pwd:=s;
usersInVFS.track(f.user, f.pwd);
end;
FK_DLCOUNT: f.DLcount:=int_(data);
FK_ACCOUNTS: f.accounts[FA_ACCESS]:=split(';',data);
FK_UPLOADACCOUNTS: f.accounts[FA_UPLOAD]:=split(';',data);
FK_DELETEACCOUNTS: f.accounts[FA_DELETE]:=split(';',data);
FK_FILESFILTER: f.filesfilter:=data;
FK_FOLDERSFILTER: f.foldersfilter:=data;
FK_UPLOADFILTER: f.uploadFilterMask:=data;
FK_REALM: f.realm:=data;
FK_DEFAULTMASK: f.defaultFileMask:=data;
FK_DIFF_TPL: f.diffTpl:=data;
FK_DONTCOUNTASDOWNLOADMASK: f.dontCountAsDownloadMask:=data;
FK_ACCOUNTS: f.accounts[FA_ACCESS]:=split(';',s);
FK_UPLOADACCOUNTS: f.accounts[FA_UPLOAD]:=split(';',s);
FK_DELETEACCOUNTS: f.accounts[FA_DELETE]:=split(';',s);
FK_FILESFILTER: f.filesfilter:=s;
FK_FOLDERSFILTER: f.foldersfilter:=s;
FK_UPLOADFILTER: f.uploadFilterMask:=s;
FK_REALM: f.realm:=s;
FK_DEFAULTMASK: f.defaultFileMask:=s;
FK_DIFF_TPL: f.diffTpl:=s;
FK_DONTCOUNTASDOWNLOADMASK: f.dontCountAsDownloadMask:=s;
FK_DONTCOUNTASDOWNLOAD: if boolean(data[1]) then include(f.flags, FA_DONT_COUNT_AS_DL); // legacy, now moved into flags
FK_ICON_GIF: if data > '' then f.setupImage(str2pic(data));
FK_AUTOUPDATED_FILES: parseAutoupdatedFiles(data);
@ -9359,7 +9374,8 @@ for act:=low(act) to high(act) do
if FA_VIS_ONLY_ANON in f.flags then
loadingVFS.visOnlyAnon:=TRUE;
if f.isVirtualFolder() or f.isLink() then f.mtime:=f.atime;
if f.isVirtualFolder() or f.isLink() then
f.mtime:=f.atime;
if assigned(f.accounts[FA_UPLOAD]) and (f.resource > '') then
addString(f.resource, uploadPaths);
f.setupImage();
@ -12286,7 +12302,6 @@ var
dll: HMODULE;
INITIALIZATION
randomize();
setErrorMode(SEM_FAILCRITICALERRORS);
exePath:=extractFilePath(ExpandFileName(paramStr(0)));

View File

@ -199,9 +199,10 @@ procedure excludeTrailingString(var s:string; ss:string);
function findEOL(s:string; ofs:integer=1; included:boolean=TRUE):integer;
function getUniqueName(start:string; exists:TnameExistsFun):string;
function getStr(from,to_:pchar):string;
function TLV(t:integer; data:ansistring):ansistring;
function TLV_NOT_EMPTY(t:integer; data:ansistring):ansistring;
function popTLV(var s,data:ansistring):integer;
function TLV(t:integer; s:string):ansistring; overload;
function TLV(t:integer; data:ansistring):ansistring; overload;
function TLV_NOT_EMPTY(t:integer; s:string):ansistring; overload;
function TLV_NOT_EMPTY(t:integer; data:ansistring):ansistring;overload;
function getCRC(data:ansistring):integer;
function dotted(i:int64):string;
function xtpl(src:string; table:array of string):string; overload;
@ -787,21 +788,24 @@ result.right:=strToInt(chop(',',s));
result.bottom:=strToInt(chop(',',s));
end; // strToRect
function TLV(t:integer; s:string):ansistring;
var
raw: ansistring;
begin
raw:=UTF8encode(s);
if length(raw) > length(s) then
inc(t, TLV_UTF8_FLAG);
result:=str_(t)+str_(length(raw))+raw
end;
function TLV(t:integer; data:ansistring):ansistring;
begin result:=str_(t)+str_(length(data))+data end;
function TLV_NOT_EMPTY(t:integer; data:ansistring):ansistring;
begin if data > '' then result:=TLV(t,data) else result:='' end;
// for heavy jobs you are supposed to use class Ttlv
function popTLV(var s,data:ansistring):integer;
begin
result:=-1;
if length(s) < 8 then exit;
result:=Pinteger(@s[1])^;
data:=copy(s,9,Pinteger(@s[5])^);
delete(s,1,8+length(data));
end; // popTLV
function TLV_NOT_EMPTY(t:integer; s:string):ansistring;
begin if s > '' then result:=TLV(t,s) else result:='' end;
function getCRC(data:ansistring):integer;
var