You are here |
gcc.godbolt.org | ||
| | | |
zig.godbolt.org
|
|
| | | | const std = @import("std"); const testing = std.testing; const utfValidator = struct { const State = enum(u6) { ok, one, two, three, fail, }; fn step(char: u8, state: State) State { return switch (state) { .ok => switch (char) { 0x00...0x7f => .ok, 0xc0...0xdf => .one, 0xe0...0xef => .two, 0xf0...0xf7 => .three, else => .fail, }, .one => switch (char) { 0x80...0xbf => .ok, else => .fail, }, .two => switch (char) { 0x80...0xbf => .one, else => .fail, }, .three => switch (char) { 0x80...0xbf => .two, else => .fail, }, .fail => .fail, }; } }; fn pext(val: u64, mask: u64) callconv(.Inline) u64 { return asm("pext %[m],%[v],%[r]":[r] "=r" (-> u64) : [v] "r" (val), [m] "r" (mask)); } pub fn is_utf8(data: []const u8) bool { const table1 = comptime init: { var tmp = [_]u32{0} ** 256; @setEvalBranchQuota(5000); for (&tmp, 0..) |*e, i| { for (@typeInfo(utfValidator.State).Enum.fields) |f| { e.* |= (5 * @as(u32, @enumToInt(utfValidator.step(i, @intToEnum(utfValidator.State, f.value))))) << (f.value * 5); } } break :init tmp; }; const table2 = comptime init: { var tmp = [_]u32{0} ** 1024; @setEvalBranchQuota(50000); for (&tmp, 0..) |*e, i| { for (@typeInfo(utfValidator.State).Enum.fields) |f| { var s = @intToEnum(utfValidator.State, f.value); s = utfValidator.step((i & 0x01f) << 3, s); s = utfValidator.step((i & 0x3e0) >> 2, s); e.* |= (5 * @as(u32, @enumToInt(s))) << (f.value * 5); } } break :init tmp; }; var state: u32 = 0; if (data.len < 16) { for (data) |w| { state = table1[w] >> @truncate(u5, state); } return @intToEnum(utfValidator.State, @truncate(u5, state) / 5) == .ok; } var dp = data.ptr; const ep = dp + data.len; // Process first few bytes one-by-one to align the pointer to 4 bytes. const extra = @ptrToInt(dp) & 7; if (extra != 0) for (0..8-extra) |_| { state = table1[dp[0]] >> @truncate(u5, state); dp += 1; }; var wp = @ptrCast([*]const u64, @alignCast(8, dp)); const unroll = 4; while (@ptrToInt(wp+unroll) < @ptrToInt(ep)) { inline for (wp, 0..unroll) |w, _| { state = table2[pext(w, 0x000000000000f8f8)] >> @truncate(u5, state); state = table2[pext(w, 0x00000000f8f80000)] >> @truncate(u5, state); state = table2[pext(w, 0x0000f8f800000000)] >> @truncate(u5, state); state = table2[pext(w, 0xf8f8000000000000)] >> @truncate(u5, state); } wp += unroll; } dp = @ptrCast([*]const u8, wp); while (dp != ep) { state = table1[dp[0]] >> @truncate(u5, state); dp += 1; } return @intToEnum(utfValidator.State, @truncate(u5, state) / 5) == .ok; } pub fn main() !void { const str = "a random string to valida\xffte"; const str2 = "asdasda"; std.debug.print("{}\n", .{@call(.never_inline, is_utf8, .{str})}); std.debug.print("{}\n", .{@call(.never_inline, is_utf8, .{str2})}); } | |
| | | |
darkcoding.net
|
|
| | | | Solvitas perambulum | |
| | | |
mcyoung.xyz
|
|
| | | | ||
| | | |
developsense.com
|
|
| | Why do we refer to the real requirements for a product as 'non-functional' requirements? Here's a short video in which I talk about that. https://youtu.be/f |