From c7bff8a5e7453f4e91b22b9d2aecb5ddaca5bdf1 Mon Sep 17 00:00:00 2001 From: Emil Lerch Date: Fri, 23 Jul 2021 14:04:12 -0700 Subject: [PATCH] correct remaining to_snake issues in service manifest --- codegen/src/main.zig | 22 ++++++- codegen/src/snake.zig | 141 ++++++++++++++++++++++++++++++------------ 2 files changed, 122 insertions(+), 41 deletions(-) diff --git a/codegen/src/main.zig b/codegen/src/main.zig index d47de74..2d6bc4c 100644 --- a/codegen/src/main.zig +++ b/codegen/src/main.zig @@ -106,7 +106,7 @@ fn generateServices(allocator: *std.mem.Allocator, comptime _: []const u8, file: // Service struct // name of the field will be snake_case of whatever comes in from // sdk_id. Not sure this will simple... - const constant_name = try snake.fromPascalCase(allocator, sdk_id); + const constant_name = try constantName(allocator, sdk_id); try constant_names.append(constant_name); try writer.print("const Self = @This();\n", .{}); try writer.print("pub const version: []const u8 = \"{s}\";\n", .{version}); @@ -132,6 +132,26 @@ fn generateServices(allocator: *std.mem.Allocator, comptime _: []const u8, file: } return constant_names.toOwnedSlice(); } +fn constantName(allocator: *std.mem.Allocator, id: []const u8) ![]const u8 { + // There are some ids that don't follow consistent rules, so we'll + // look for the exceptions and, if not found, revert to the snake case + // algorithm + + // This one might be a bug in snake, but it's the only example so HPDL + if (std.mem.eql(u8, id, "SESv2")) return try std.fmt.allocPrint(allocator, "ses_v2", .{}); + // IoT is an acryonym, but snake wouldn't know that. Interestingly not all + // iot services are capitalizing that way. + if (std.mem.eql(u8, id, "IoTSiteWise")) return try std.fmt.allocPrint(allocator, "iot_site_wise", .{}); //sitewise? + if (std.mem.eql(u8, id, "IoTFleetHub")) return try std.fmt.allocPrint(allocator, "iot_fleet_hub", .{}); + if (std.mem.eql(u8, id, "IoTSecureTunneling")) return try std.fmt.allocPrint(allocator, "iot_secure_tunneling", .{}); + if (std.mem.eql(u8, id, "IoTThingsGraph")) return try std.fmt.allocPrint(allocator, "iot_things_graph", .{}); + // snake turns this into dev_ops, which is a little weird + if (std.mem.eql(u8, id, "DevOps Guru")) return try std.fmt.allocPrint(allocator, "devops_guru", .{}); + if (std.mem.eql(u8, id, "FSx")) return try std.fmt.allocPrint(allocator, "fsx", .{}); + + // Not a special case - just snake it + return try snake.fromPascalCase(allocator, id); +} fn generateOperation(allocator: *std.mem.Allocator, operation: smithy.ShapeInfo, shapes: anytype, writer: anytype, service: []const u8) !void { const snake_case_name = try snake.fromPascalCase(allocator, operation.name); defer allocator.free(snake_case_name); diff --git a/codegen/src/snake.zig b/codegen/src/snake.zig index 600ff9e..2747793 100644 --- a/codegen/src/snake.zig +++ b/codegen/src/snake.zig @@ -2,59 +2,97 @@ const std = @import("std"); const expectEqualStrings = std.testing.expectEqualStrings; pub fn fromPascalCase(allocator: *std.mem.Allocator, name: []const u8) ![]u8 { + const rc = try allocator.alloc(u8, name.len * 2); // This is overkill, but is > the maximum length possibly needed + errdefer allocator.free(rc); var utf8_name = (std.unicode.Utf8View.init(name) catch unreachable).iterator(); var target_inx: u64 = 0; - var previous_codepoint: ?u21 = null; - var cp = utf8_name.nextCodepoint(); - if (cp == null) { - return try allocator.dupeZ(u8, name); - } // TODO: fix bug if single letter uppercase - var codepoint = cp.?; - const rc = try allocator.alloc(u8, name.len * 2); // This is overkill, but is > the maximum length possibly needed - while (utf8_name.nextCodepoint()) |next_codepoint| { - if (codepoint > 0xff) return error{UnicodeNotSupported}.UnicodeNotSupported; - if (next_codepoint > 0xff) return error{UnicodeNotSupported}.UnicodeNotSupported; - const ascii_char = @truncate(u8, codepoint); - if (next_codepoint == ' ') continue; // ignore all spaces in name - if (ascii_char >= 'A' and ascii_char < 'Z') { - const lowercase_char = ascii_char + ('a' - 'A'); - if (previous_codepoint == null) { - rc[target_inx] = lowercase_char; - target_inx = target_inx + 1; + var curr_char = (try isAscii(utf8_name.nextCodepoint())).?; + target_inx = setNext(lowercase(curr_char), rc, target_inx); + var prev_char = curr_char; + if (try isAscii(utf8_name.nextCodepoint())) |ch| { + curr_char = ch; + } else { + // Single character only - we're done here + _ = setNext(0, rc, target_inx); + return rc[0..target_inx]; + } + while (try isAscii(utf8_name.nextCodepoint())) |next_char| { + if (next_char == ' ') { + // a space shouldn't be happening. But if it does, it clues us + // in pretty well: + // + // MyStuff Is Awesome + // |^ + // |next_char + // ^ + // prev_codepoint/ascii_prev_char (and target_inx) + target_inx = setNext(lowercase(curr_char), rc, target_inx); + target_inx = setNext('_', rc, target_inx); + curr_char = (try isAscii(utf8_name.nextCodepoint())).?; + target_inx = setNext(lowercase(curr_char), rc, target_inx); + prev_char = curr_char; + curr_char = (try isAscii(utf8_name.nextCodepoint())).?; + continue; + } + if (between(curr_char, 'A', 'Z')) { + if (isAcronym(curr_char, next_char)) { + // We could be in an acronym at the start of a word. This + // is the only case where we actually need to look back at the + // previous character, and if that's the case, throw in an + // underscore + // "SAMLMySAMLAcronymThing"); + if (between(prev_char, 'a', 'z')) + target_inx = setNext('_', rc, target_inx); + + //we are in an acronym - don't snake, just lower + target_inx = setNext(lowercase(curr_char), rc, target_inx); } else { - if (next_codepoint >= 'A' and next_codepoint <= 'Z' and previous_codepoint.? >= 'A' and previous_codepoint.? <= 'Z') { - //we are in an acronym - don't snake, just lower - rc[target_inx] = lowercase_char; - target_inx = target_inx + 1; - } else { - rc[target_inx] = '_'; - rc[target_inx + 1] = lowercase_char; - target_inx = target_inx + 2; - } + target_inx = setNext('_', rc, target_inx); + target_inx = setNext(lowercase(curr_char), rc, target_inx); } } else { - // if (ascii_char == ' ') { - // rc[target_inx] = '_'; - // } else { - rc[target_inx] = ascii_char; - // } - target_inx = target_inx + 1; + target_inx = setNext(curr_char, rc, target_inx); } - previous_codepoint = codepoint; - codepoint = next_codepoint; + prev_char = curr_char; + curr_char = next_char; } // work in the last codepoint - force lowercase - rc[target_inx] = @truncate(u8, codepoint); - if (rc[target_inx] >= 'A' and rc[target_inx] <= 'Z') { - const lowercase_char = rc[target_inx] + ('a' - 'A'); - rc[target_inx] = lowercase_char; - } - target_inx = target_inx + 1; + target_inx = setNext(lowercase(curr_char), rc, target_inx); rc[target_inx] = 0; return rc[0..target_inx]; } +fn isAcronym(char1: u8, char2: u8) bool { + return isAcronymChar(char1) and isAcronymChar(char2); +} +fn isAcronymChar(char: u8) bool { + return between(char, 'A', 'Z') or between(char, '0', '9'); +} +fn isAscii(codepoint: ?u21) !?u8 { + if (codepoint) |cp| { + if (cp > 0xff) return error.UnicodeNotSupported; + return @truncate(u8, cp); + } + return null; +} + +fn setNext(ascii: u8, slice: []u8, inx: u64) u64 { + slice[inx] = ascii; + return inx + 1; +} + +fn lowercase(ascii: u8) u8 { + var lowercase_char = ascii; + if (between(ascii, 'A', 'Z')) + lowercase_char = ascii + ('a' - 'A'); + return lowercase_char; +} + +fn between(char: u8, from: u8, to: u8) bool { + return char >= from and char <= to; +} + test "converts from PascalCase to snake_case" { const allocator = std.testing.allocator; const snake_case = try fromPascalCase(allocator, "MyPascalCaseThing"); @@ -73,3 +111,26 @@ test "spaces in the name" { defer allocator.free(snake_case); try expectEqualStrings("api_gateway", snake_case); } + +test "S3" { + const allocator = std.testing.allocator; + const snake_case = try fromPascalCase(allocator, "S3"); + defer allocator.free(snake_case); + try expectEqualStrings("s3", snake_case); +} + +test "ec2" { + const allocator = std.testing.allocator; + const snake_case = try fromPascalCase(allocator, "EC2"); + defer allocator.free(snake_case); + try expectEqualStrings("ec2", snake_case); +} + +test "IoT 1Click Devices Service" { + const allocator = std.testing.allocator; + const snake_case = try fromPascalCase(allocator, "IoT 1Click Devices Service"); + defer allocator.free(snake_case); + // NOTE: There is some debate amoung humans about what this should + // turn into. Should it be iot_1click_... or iot_1_click...? + try expectEqualStrings("iot_1_click_devices_service", snake_case); +}