correct remaining to_snake issues in service manifest

2021-07-23 14:04:12 -07:00 · 2021-07-23 14:04:12 -07:00 · c7bff8a5e7
commit c7bff8a5e7
parent 2e01197d58
2 changed files with 122 additions and 41 deletions
--- a/codegen/src/main.zig
+++ b/codegen/src/main.zig
@ -106,7 +106,7 @@ fn generateServices(allocator: *std.mem.Allocator, comptime _: []const u8, file:
        // Service struct
        // name of the field will be snake_case of whatever comes in from
        // sdk_id. Not sure this will simple...
-        const constant_name = try snake.fromPascalCase(allocator, sdk_id);
+        const constant_name = try constantName(allocator, sdk_id);
        try constant_names.append(constant_name);
        try writer.print("const Self = @This();\n", .{});
        try writer.print("pub const version: []const u8 = \"{s}\";\n", .{version});
@ -132,6 +132,26 @@ fn generateServices(allocator: *std.mem.Allocator, comptime _: []const u8, file:
    }
    return constant_names.toOwnedSlice();
 }
 fn constantName(allocator: *std.mem.Allocator, id: []const u8) ![]const u8 {
    // There are some ids that don't follow consistent rules, so we'll
    // look for the exceptions and, if not found, revert to the snake case
    // algorithm
    // This one might be a bug in snake, but it's the only example so HPDL
    if (std.mem.eql(u8, id, "SESv2")) return try std.fmt.allocPrint(allocator, "ses_v2", .{});
    // IoT is an acryonym, but snake wouldn't know that. Interestingly not all
    // iot services are capitalizing that way.
    if (std.mem.eql(u8, id, "IoTSiteWise")) return try std.fmt.allocPrint(allocator, "iot_site_wise", .{}); //sitewise?
    if (std.mem.eql(u8, id, "IoTFleetHub")) return try std.fmt.allocPrint(allocator, "iot_fleet_hub", .{});
    if (std.mem.eql(u8, id, "IoTSecureTunneling")) return try std.fmt.allocPrint(allocator, "iot_secure_tunneling", .{});
    if (std.mem.eql(u8, id, "IoTThingsGraph")) return try std.fmt.allocPrint(allocator, "iot_things_graph", .{});
    // snake turns this into dev_ops, which is a little weird
    if (std.mem.eql(u8, id, "DevOps Guru")) return try std.fmt.allocPrint(allocator, "devops_guru", .{});
    if (std.mem.eql(u8, id, "FSx")) return try std.fmt.allocPrint(allocator, "fsx", .{});
    // Not a special case - just snake it
    return try snake.fromPascalCase(allocator, id);
 }
 fn generateOperation(allocator: *std.mem.Allocator, operation: smithy.ShapeInfo, shapes: anytype, writer: anytype, service: []const u8) !void {
    const snake_case_name = try snake.fromPascalCase(allocator, operation.name);
    defer allocator.free(snake_case_name);
--- a/codegen/src/snake.zig
+++ b/codegen/src/snake.zig
@ -2,59 +2,97 @@ const std = @import("std");
 const expectEqualStrings = std.testing.expectEqualStrings;
 pub fn fromPascalCase(allocator: *std.mem.Allocator, name: []const u8) ![]u8 {
    const rc = try allocator.alloc(u8, name.len * 2); // This is overkill, but is > the maximum length possibly needed
    errdefer allocator.free(rc);
    var utf8_name = (std.unicode.Utf8View.init(name) catch unreachable).iterator();
    var target_inx: u64 = 0;
-    var previous_codepoint: ?u21 = null;
+    var curr_char = (try isAscii(utf8_name.nextCodepoint())).?;
-    var cp = utf8_name.nextCodepoint();
+    target_inx = setNext(lowercase(curr_char), rc, target_inx);
-    if (cp == null) {
+    var prev_char = curr_char;
-        return try allocator.dupeZ(u8, name);
+    if (try isAscii(utf8_name.nextCodepoint())) |ch| {
-    } // TODO: fix bug if single letter uppercase
+        curr_char = ch;
-    var codepoint = cp.?;
+    } else {
-    const rc = try allocator.alloc(u8, name.len * 2); // This is overkill, but is > the maximum length possibly needed
+        // Single character only - we're done here
-    while (utf8_name.nextCodepoint()) |next_codepoint| {
+        _ = setNext(0, rc, target_inx);
-        if (codepoint > 0xff) return error{UnicodeNotSupported}.UnicodeNotSupported;
+        return rc[0..target_inx];
-        if (next_codepoint > 0xff) return error{UnicodeNotSupported}.UnicodeNotSupported;
+    }
-        const ascii_char = @truncate(u8, codepoint);
+    while (try isAscii(utf8_name.nextCodepoint())) |next_char| {
-        if (next_codepoint == ' ') continue; // ignore all spaces in name
+        if (next_char == ' ') {
-        if (ascii_char >= 'A' and ascii_char < 'Z') {
+            // a space shouldn't be happening. But if it does, it clues us
-            const lowercase_char = ascii_char + ('a' - 'A');
+            // in pretty well:
-            if (previous_codepoint == null) {
+            //
-                rc[target_inx] = lowercase_char;
+            // MyStuff Is Awesome
-                target_inx = target_inx + 1;
+            //       |^
            //       |next_char
            //       ^
            //       prev_codepoint/ascii_prev_char (and target_inx)
            target_inx = setNext(lowercase(curr_char), rc, target_inx);
            target_inx = setNext('_', rc, target_inx);
            curr_char = (try isAscii(utf8_name.nextCodepoint())).?;
            target_inx = setNext(lowercase(curr_char), rc, target_inx);
            prev_char = curr_char;
            curr_char = (try isAscii(utf8_name.nextCodepoint())).?;
            continue;
        }
        if (between(curr_char, 'A', 'Z')) {
            if (isAcronym(curr_char, next_char)) {
                // We could be in an acronym at the start of a word. This
                // is the only case where we actually need to look back at the
                // previous character, and if that's the case, throw in an
                // underscore
                // "SAMLMySAMLAcronymThing");
                if (between(prev_char, 'a', 'z'))
                    target_inx = setNext('_', rc, target_inx);
                //we are in an acronym - don't snake, just lower
                target_inx = setNext(lowercase(curr_char), rc, target_inx);
            } else {
-                if (next_codepoint >= 'A' and next_codepoint <= 'Z' and previous_codepoint.? >= 'A' and previous_codepoint.? <= 'Z') {
+                target_inx = setNext('_', rc, target_inx);
-                    //we are in an acronym - don't snake, just lower
+                target_inx = setNext(lowercase(curr_char), rc, target_inx);
                    rc[target_inx] = lowercase_char;
                    target_inx = target_inx + 1;
                } else {
                    rc[target_inx] = '_';
                    rc[target_inx + 1] = lowercase_char;
                    target_inx = target_inx + 2;
                }
            }
        } else {
-            // if (ascii_char == ' ') {
+            target_inx = setNext(curr_char, rc, target_inx);
            //     rc[target_inx] = '_';
            // } else {
            rc[target_inx] = ascii_char;
            // }
            target_inx = target_inx + 1;
        }
-        previous_codepoint = codepoint;
+        prev_char = curr_char;
-        codepoint = next_codepoint;
+        curr_char = next_char;
    }
    // work in the last codepoint - force lowercase
-    rc[target_inx] = @truncate(u8, codepoint);
+    target_inx = setNext(lowercase(curr_char), rc, target_inx);
    if (rc[target_inx] >= 'A' and rc[target_inx] <= 'Z') {
        const lowercase_char = rc[target_inx] + ('a' - 'A');
        rc[target_inx] = lowercase_char;
    }
    target_inx = target_inx + 1;
    rc[target_inx] = 0;
    return rc[0..target_inx];
 }
 fn isAcronym(char1: u8, char2: u8) bool {
    return isAcronymChar(char1) and isAcronymChar(char2);
 }
 fn isAcronymChar(char: u8) bool {
    return between(char, 'A', 'Z') or between(char, '0', '9');
 }
 fn isAscii(codepoint: ?u21) !?u8 {
    if (codepoint) |cp| {
        if (cp > 0xff) return error.UnicodeNotSupported;
        return @truncate(u8, cp);
    }
    return null;
 }
 fn setNext(ascii: u8, slice: []u8, inx: u64) u64 {
    slice[inx] = ascii;
    return inx + 1;
 }
 fn lowercase(ascii: u8) u8 {
    var lowercase_char = ascii;
    if (between(ascii, 'A', 'Z'))
        lowercase_char = ascii + ('a' - 'A');
    return lowercase_char;
 }
 fn between(char: u8, from: u8, to: u8) bool {
    return char >= from and char <= to;
 }
 test "converts from PascalCase to snake_case" {
    const allocator = std.testing.allocator;
    const snake_case = try fromPascalCase(allocator, "MyPascalCaseThing");
@ -73,3 +111,26 @@ test "spaces in the name" {
    defer allocator.free(snake_case);
    try expectEqualStrings("api_gateway", snake_case);
 }
 test "S3" {
    const allocator = std.testing.allocator;
    const snake_case = try fromPascalCase(allocator, "S3");
    defer allocator.free(snake_case);
    try expectEqualStrings("s3", snake_case);
 }
 test "ec2" {
    const allocator = std.testing.allocator;
    const snake_case = try fromPascalCase(allocator, "EC2");
    defer allocator.free(snake_case);
    try expectEqualStrings("ec2", snake_case);
 }
 test "IoT 1Click Devices Service" {
    const allocator = std.testing.allocator;
    const snake_case = try fromPascalCase(allocator, "IoT 1Click Devices Service");
    defer allocator.free(snake_case);
    // NOTE: There is some debate amoung humans about what this should
    // turn into. Should it be iot_1click_... or iot_1_click...?
    try expectEqualStrings("iot_1_click_devices_service", snake_case);
 }