aws-sdk-for-zig/codegen/src/snake.zig

const std = @import("std");
const expectEqualStrings = std.testing.expectEqualStrings;

pub fn fromPascalCase(allocator: std.mem.Allocator, name: []const u8) ![]u8 {
    const rc = try allocator.alloc(u8, name.len * 2); // This is overkill, but is > the maximum length possibly needed
    errdefer allocator.free(rc);
    var utf8_name = (std.unicode.Utf8View.init(name) catch unreachable).iterator();
    var target_inx: u64 = 0;
    var curr_char = (try isAscii(utf8_name.nextCodepoint())).?;
    target_inx = setNext(lowercase(curr_char), rc, target_inx);
    var prev_char = curr_char;
    if (try isAscii(utf8_name.nextCodepoint())) |ch| {
        curr_char = ch;
    } else {
        // Single character only - we're done here
        _ = setNext(0, rc, target_inx);
        return rc[0..target_inx];
    }
    while (try isAscii(utf8_name.nextCodepoint())) |next_char| {
        if (next_char == ' ') {
            // a space shouldn't be happening. But if it does, it clues us
            // in pretty well:
            //
            // MyStuff Is Awesome
            //       |^
            //       |next_char
            //       ^
            //       prev_codepoint/ascii_prev_char (and target_inx)
            target_inx = setNext(lowercase(curr_char), rc, target_inx);
            target_inx = setNext('_', rc, target_inx);
            var maybe_curr_char = (try isAscii(utf8_name.nextCodepoint()));
            if (maybe_curr_char == null) {
                std.log.err("Error on fromPascalCase processing name '{s}'", .{name});
            }
            curr_char = maybe_curr_char.?;
            maybe_curr_char = (try isAscii(utf8_name.nextCodepoint()));
            if (maybe_curr_char == null) {
                // We have reached the end of the string (e.g. "Resource Explorer 2")
                // We need to do this check before we setNext, so that we don't
                // end up duplicating the last character
                break;
                // std.log.err("Error on fromPascalCase processing name '{s}', curr_char = '{}'", .{ name, curr_char });
            }
            target_inx = setNext(lowercase(curr_char), rc, target_inx);
            prev_char = curr_char;
            curr_char = maybe_curr_char.?;
            continue;
        }
        if (between(curr_char, 'A', 'Z')) {
            if (isAcronym(curr_char, next_char)) {
                // We could be in an acronym at the start of a word. This
                // is the only case where we actually need to look back at the
                // previous character, and if that's the case, throw in an
                // underscore
                // "SAMLMySAMLAcronymThing");
                if (between(prev_char, 'a', 'z'))
                    target_inx = setNext('_', rc, target_inx);

                //we are in an acronym - don't snake, just lower
                target_inx = setNext(lowercase(curr_char), rc, target_inx);
            } else {
                target_inx = setNext('_', rc, target_inx);
                target_inx = setNext(lowercase(curr_char), rc, target_inx);
            }
        } else {
            target_inx = setNext(curr_char, rc, target_inx);
        }
        prev_char = curr_char;
        curr_char = next_char;
    }
    // work in the last codepoint - force lowercase
    target_inx = setNext(lowercase(curr_char), rc, target_inx);

    rc[target_inx] = 0;
    _ = allocator.resize(rc, target_inx);
    return rc[0..target_inx];
}

fn isAcronym(char1: u8, char2: u8) bool {
    return isAcronymChar(char1) and isAcronymChar(char2);
}
fn isAcronymChar(char: u8) bool {
    return between(char, 'A', 'Z') or between(char, '0', '9');
}
fn isAscii(codepoint: ?u21) !?u8 {
    if (codepoint) |cp| {
        if (cp > 0xff) return error.UnicodeNotSupported;
        return @as(u8, @truncate(cp));
    }
    return null;
}

fn setNext(ascii: u8, slice: []u8, inx: u64) u64 {
    slice[inx] = ascii;
    return inx + 1;
}

fn lowercase(ascii: u8) u8 {
    var lowercase_char = ascii;
    if (between(ascii, 'A', 'Z'))
        lowercase_char = ascii + ('a' - 'A');
    return lowercase_char;
}

fn between(char: u8, from: u8, to: u8) bool {
    return char >= from and char <= to;
}

test "converts from PascalCase to snake_case" {
    const allocator = std.testing.allocator;
    const snake_case = try fromPascalCase(allocator, "MyPascalCaseThing");
    defer allocator.free(snake_case);
    try expectEqualStrings("my_pascal_case_thing", snake_case);
}
test "handles from PascalCase acronyms to snake_case" {
    const allocator = std.testing.allocator;
    const snake_case = try fromPascalCase(allocator, "SAMLMySAMLAcronymThing");
    defer allocator.free(snake_case);
    try expectEqualStrings("saml_my_saml_acronym_thing", snake_case);
}
test "spaces in the name" {
    const allocator = std.testing.allocator;
    const snake_case = try fromPascalCase(allocator, "API Gateway");
    defer allocator.free(snake_case);
    try expectEqualStrings("api_gateway", snake_case);
}

test "S3" {
    const allocator = std.testing.allocator;
    const snake_case = try fromPascalCase(allocator, "S3");
    defer allocator.free(snake_case);
    try expectEqualStrings("s3", snake_case);
}

test "ec2" {
    const allocator = std.testing.allocator;
    const snake_case = try fromPascalCase(allocator, "EC2");
    defer allocator.free(snake_case);
    try expectEqualStrings("ec2", snake_case);
}

test "IoT 1Click Devices Service" {
    const allocator = std.testing.allocator;
    const snake_case = try fromPascalCase(allocator, "IoT 1Click Devices Service");
    defer allocator.free(snake_case);
    // NOTE: There is some debate amoung humans about what this should
    // turn into. Should it be iot_1click_... or iot_1_click...?
    try expectEqualStrings("iot_1_click_devices_service", snake_case);
}
test "Resource Explorer 2" {
    const allocator = std.testing.allocator;
    const snake_case = try fromPascalCase(allocator, "Resource Explorer 2");
    defer allocator.free(snake_case);
    // NOTE: There is some debate amoung humans about what this should
    // turn into. Should it be iot_1click_... or iot_1_click...?
    try expectEqualStrings("resource_explorer_2", snake_case);
}
codegen all the things 2021-05-30 01:17:45 +00:00			`const std = @import("std");`
			`const expectEqualStrings = std.testing.expectEqualStrings;`

upgrade to zig 0.9.0 2021-12-23 16:51:48 +00:00			`pub fn fromPascalCase(allocator: std.mem.Allocator, name: []const u8) ![]u8 {`
correct remaining to_snake issues in service manifest 2021-07-23 21:04:12 +00:00			`const rc = try allocator.alloc(u8, name.len * 2); // This is overkill, but is > the maximum length possibly needed`
			`errdefer allocator.free(rc);`
codegen all the things 2021-05-30 01:17:45 +00:00			`var utf8_name = (std.unicode.Utf8View.init(name) catch unreachable).iterator();`
			`var target_inx: u64 = 0;`
correct remaining to_snake issues in service manifest 2021-07-23 21:04:12 +00:00			`var curr_char = (try isAscii(utf8_name.nextCodepoint())).?;`
			`target_inx = setNext(lowercase(curr_char), rc, target_inx);`
			`var prev_char = curr_char;`
			`if (try isAscii(utf8_name.nextCodepoint())) \|ch\| {`
			`curr_char = ch;`
			`} else {`
			`// Single character only - we're done here`
			`_ = setNext(0, rc, target_inx);`
			`return rc[0..target_inx];`
			`}`
			`while (try isAscii(utf8_name.nextCodepoint())) \|next_char\| {`
			`if (next_char == ' ') {`
			`// a space shouldn't be happening. But if it does, it clues us`
			`// in pretty well:`
			`//`
			`// MyStuff Is Awesome`
			`// \|^`
			`// \|next_char`
			`// ^`
			`// prev_codepoint/ascii_prev_char (and target_inx)`
			`target_inx = setNext(lowercase(curr_char), rc, target_inx);`
			`target_inx = setNext('_', rc, target_inx);`
update snake case to handle space followed by single character at end of string 2024-02-29 21:43:48 +00:00			`var maybe_curr_char = (try isAscii(utf8_name.nextCodepoint()));`
			`if (maybe_curr_char == null) {`
			`std.log.err("Error on fromPascalCase processing name '{s}'", .{name});`
			`}`
			`curr_char = maybe_curr_char.?;`
			`maybe_curr_char = (try isAscii(utf8_name.nextCodepoint()));`
			`if (maybe_curr_char == null) {`
			`// We have reached the end of the string (e.g. "Resource Explorer 2")`
			`// We need to do this check before we setNext, so that we don't`
			`// end up duplicating the last character`
			`break;`
			`// std.log.err("Error on fromPascalCase processing name '{s}', curr_char = '{}'", .{ name, curr_char });`
			`}`
correct remaining to_snake issues in service manifest 2021-07-23 21:04:12 +00:00			`target_inx = setNext(lowercase(curr_char), rc, target_inx);`
			`prev_char = curr_char;`
update snake case to handle space followed by single character at end of string 2024-02-29 21:43:48 +00:00			`curr_char = maybe_curr_char.?;`
correct remaining to_snake issues in service manifest 2021-07-23 21:04:12 +00:00			`continue;`
			`}`
			`if (between(curr_char, 'A', 'Z')) {`
			`if (isAcronym(curr_char, next_char)) {`
			`// We could be in an acronym at the start of a word. This`
			`// is the only case where we actually need to look back at the`
			`// previous character, and if that's the case, throw in an`
			`// underscore`
			`// "SAMLMySAMLAcronymThing");`
			`if (between(prev_char, 'a', 'z'))`
			`target_inx = setNext('_', rc, target_inx);`

			`//we are in an acronym - don't snake, just lower`
			`target_inx = setNext(lowercase(curr_char), rc, target_inx);`
codegen all the things 2021-05-30 01:17:45 +00:00			`} else {`
correct remaining to_snake issues in service manifest 2021-07-23 21:04:12 +00:00			`target_inx = setNext('_', rc, target_inx);`
			`target_inx = setNext(lowercase(curr_char), rc, target_inx);`
codegen all the things 2021-05-30 01:17:45 +00:00			`}`
			`} else {`
correct remaining to_snake issues in service manifest 2021-07-23 21:04:12 +00:00			`target_inx = setNext(curr_char, rc, target_inx);`
codegen all the things 2021-05-30 01:17:45 +00:00			`}`
correct remaining to_snake issues in service manifest 2021-07-23 21:04:12 +00:00			`prev_char = curr_char;`
			`curr_char = next_char;`
codegen all the things 2021-05-30 01:17:45 +00:00			`}`
			`// work in the last codepoint - force lowercase`
correct remaining to_snake issues in service manifest 2021-07-23 21:04:12 +00:00			`target_inx = setNext(lowercase(curr_char), rc, target_inx);`
codegen all the things 2021-05-30 01:17:45 +00:00
			`rc[target_inx] = 0;`
update snake case to handle space followed by single character at end of string 2024-02-29 21:43:48 +00:00			`_ = allocator.resize(rc, target_inx);`
codegen all the things 2021-05-30 01:17:45 +00:00			`return rc[0..target_inx];`
			`}`

correct remaining to_snake issues in service manifest 2021-07-23 21:04:12 +00:00			`fn isAcronym(char1: u8, char2: u8) bool {`
			`return isAcronymChar(char1) and isAcronymChar(char2);`
			`}`
			`fn isAcronymChar(char: u8) bool {`
			`return between(char, 'A', 'Z') or between(char, '0', '9');`
			`}`
			`fn isAscii(codepoint: ?u21) !?u8 {`
			`if (codepoint) \|cp\| {`
			`if (cp > 0xff) return error.UnicodeNotSupported;`
codegen: revert build to mostly stock, update to 0.11 2023-08-14 22:24:46 +00:00			`return @as(u8, @truncate(cp));`
correct remaining to_snake issues in service manifest 2021-07-23 21:04:12 +00:00			`}`
			`return null;`
			`}`

			`fn setNext(ascii: u8, slice: []u8, inx: u64) u64 {`
			`slice[inx] = ascii;`
			`return inx + 1;`
			`}`

			`fn lowercase(ascii: u8) u8 {`
			`var lowercase_char = ascii;`
			`if (between(ascii, 'A', 'Z'))`
			`lowercase_char = ascii + ('a' - 'A');`
			`return lowercase_char;`
			`}`

			`fn between(char: u8, from: u8, to: u8) bool {`
			`return char >= from and char <= to;`
			`}`

codegen all the things 2021-05-30 01:17:45 +00:00			`test "converts from PascalCase to snake_case" {`
			`const allocator = std.testing.allocator;`
			`const snake_case = try fromPascalCase(allocator, "MyPascalCaseThing");`
			`defer allocator.free(snake_case);`
allow for spaces in the name to be "handled" (ignored) 2021-06-30 00:19:34 +00:00			`try expectEqualStrings("my_pascal_case_thing", snake_case);`
codegen all the things 2021-05-30 01:17:45 +00:00			`}`
			`test "handles from PascalCase acronyms to snake_case" {`
			`const allocator = std.testing.allocator;`
			`const snake_case = try fromPascalCase(allocator, "SAMLMySAMLAcronymThing");`
			`defer allocator.free(snake_case);`
allow for spaces in the name to be "handled" (ignored) 2021-06-30 00:19:34 +00:00			`try expectEqualStrings("saml_my_saml_acronym_thing", snake_case);`
			`}`
			`test "spaces in the name" {`
			`const allocator = std.testing.allocator;`
			`const snake_case = try fromPascalCase(allocator, "API Gateway");`
			`defer allocator.free(snake_case);`
			`try expectEqualStrings("api_gateway", snake_case);`
codegen all the things 2021-05-30 01:17:45 +00:00			`}`
correct remaining to_snake issues in service manifest 2021-07-23 21:04:12 +00:00
			`test "S3" {`
			`const allocator = std.testing.allocator;`
			`const snake_case = try fromPascalCase(allocator, "S3");`
			`defer allocator.free(snake_case);`
			`try expectEqualStrings("s3", snake_case);`
			`}`

			`test "ec2" {`
			`const allocator = std.testing.allocator;`
			`const snake_case = try fromPascalCase(allocator, "EC2");`
			`defer allocator.free(snake_case);`
			`try expectEqualStrings("ec2", snake_case);`
			`}`

			`test "IoT 1Click Devices Service" {`
			`const allocator = std.testing.allocator;`
			`const snake_case = try fromPascalCase(allocator, "IoT 1Click Devices Service");`
			`defer allocator.free(snake_case);`
			`// NOTE: There is some debate amoung humans about what this should`
			`// turn into. Should it be iot_1click_... or iot_1_click...?`
			`try expectEqualStrings("iot_1_click_devices_service", snake_case);`
			`}`
update snake case to handle space followed by single character at end of string 2024-02-29 21:43:48 +00:00			`test "Resource Explorer 2" {`
			`const allocator = std.testing.allocator;`
			`const snake_case = try fromPascalCase(allocator, "Resource Explorer 2");`
			`defer allocator.free(snake_case);`
			`// NOTE: There is some debate amoung humans about what this should`
			`// turn into. Should it be iot_1click_... or iot_1_click...?`
			`try expectEqualStrings("resource_explorer_2", snake_case);`
			`}`