618 lines
23 KiB
Zig
618 lines
23 KiB
Zig
//! Wikidata SPARQL classification provider.
|
|
//!
|
|
//! ## What this provider does
|
|
//!
|
|
//! Given a stock symbol, Wikidata can answer:
|
|
//!
|
|
//! * "What kind of entity is this?" — name, industry, sector,
|
|
//! country of incorporation, inception date, instance-of
|
|
//! classification (operating company / mutual fund / ETF / …).
|
|
//! * "Does this match the SEC's CIK?" — Wikidata's P5531 already
|
|
//! stores the 10-digit zero-padded CIK matching SEC's convention.
|
|
//!
|
|
//! ## Workflow
|
|
//!
|
|
//! `fetch(symbols)` runs ONE batched SPARQL query that returns
|
|
//! per-ticker rows. The query is keyed on the US-listing (NYSE /
|
|
//! Nasdaq / NYSE Arca / OTC Markets) of each ticker — without that
|
|
//! filter, common US tickers silently resolve to whichever
|
|
//! foreign-exchange company happens to share the symbol (`MRK` →
|
|
//! Merck KGaA on Frankfurt; `PG` → People's Garment on SET; etc.).
|
|
//!
|
|
//! The provider is stateless. Caching belongs to the data service,
|
|
//! which writes per-symbol `classification.srf` files after this
|
|
//! provider returns and reads them back on subsequent calls.
|
|
//!
|
|
//! ## Glossary
|
|
//!
|
|
//! SPARQL Query language for RDF-shaped data. Wikidata's
|
|
//! primary read API.
|
|
//! P-number Property identifier in Wikidata (P249 = ticker symbol,
|
|
//! P414 = stock exchange, P31 = instance of, ...).
|
|
//! Q-number Entity identifier in Wikidata (Q40244 = ETF as a
|
|
//! concept, Q13677 = NYSE the entity, Q312 = Apple Inc.
|
|
//! the entity).
|
|
//! wdt:Pxxx Truthy/direct property statement — the simple shape.
|
|
//! p:Pxxx Reified property statement — lets a statement carry
|
|
//! qualifiers (e.g. ticker symbol AS A QUALIFIER on the
|
|
//! stock-exchange statement, rather than as a direct
|
|
//! property of the company).
|
|
//! ps:Pxxx "Statement value" predicate — within a reified
|
|
//! statement, points to the statement's main value.
|
|
//! pq:Pxxx "Qualifier" predicate — within a reified statement,
|
|
//! points to a qualifier on that statement.
|
|
//!
|
|
//! Why the reified statement matters here: Wikidata stores tickers
|
|
//! as P249 qualifiers on a P414 (stock exchange) statement, NOT as
|
|
//! a direct `wdt:P249` property. Querying naively returns zero rows
|
|
//! for nearly every US-listed equity.
|
|
|
|
const std = @import("std");
|
|
const http = @import("../net/http.zig");
|
|
const fmt = @import("../format.zig");
|
|
|
|
const sparql_endpoint = "https://query.wikidata.org/sparql";
|
|
|
|
/// Per-symbol classification record produced by parsing a Wikidata
|
|
/// SPARQL response. Fields are nullable when Wikidata has no value
|
|
/// for that property; the `source` field always emits per the
|
|
/// project's source-pure invariant.
|
|
pub const ClassificationRecord = struct {
|
|
symbol: []const u8, // owned
|
|
name: ?[]const u8 = null, // owned
|
|
sector: ?[]const u8 = null, // owned
|
|
industry: ?[]const u8 = null, // owned
|
|
/// ISO-3166 alpha-2 country code (e.g. "US", "GB", "DE").
|
|
country: ?[]const u8 = null, // owned
|
|
asset_class: ?[]const u8 = null, // owned
|
|
is_etf: bool = false,
|
|
/// YYYY-MM-DD; trimmed from Wikidata's ISO-8601 date.
|
|
inception_date: ?[]const u8 = null, // owned
|
|
/// Wikidata's P5531 — the SEC CIK as a digit string. Wikidata
|
|
/// already zero-pads to 10 digits, matching the project-wide
|
|
/// CIK normalization convention.
|
|
cik: ?[]const u8 = null, // owned
|
|
/// YYYY-MM-DD when this provider ran, NOT when Wikidata last
|
|
/// updated the underlying entity.
|
|
as_of: []const u8, // owned
|
|
source: []const u8, // no default — provenance always emitted
|
|
|
|
pub fn deinit(self: *ClassificationRecord, allocator: std.mem.Allocator) void {
|
|
allocator.free(self.symbol);
|
|
if (self.name) |s| allocator.free(s);
|
|
if (self.sector) |s| allocator.free(s);
|
|
if (self.industry) |s| allocator.free(s);
|
|
if (self.country) |s| allocator.free(s);
|
|
if (self.asset_class) |s| allocator.free(s);
|
|
if (self.inception_date) |s| allocator.free(s);
|
|
if (self.cik) |s| allocator.free(s);
|
|
allocator.free(self.as_of);
|
|
}
|
|
};
|
|
|
|
/// Geo-bucket constants used by the country → geo lookup. Kept as
|
|
/// named constants (rather than inline string literals in the map)
|
|
/// so callers can reference them without typo risk and the
|
|
/// taxonomy is tweakable in one place.
|
|
pub const geo = struct {
|
|
pub const us = "US";
|
|
pub const developed = "International Developed";
|
|
pub const emerging = "Emerging Markets";
|
|
pub const unknown = "Unknown";
|
|
};
|
|
|
|
/// Wikidata Q-IDs we test against `instance of` (P31) to classify
|
|
/// fund-shaped securities. Curated, not exhaustive.
|
|
const etf_q_ids = [_][]const u8{
|
|
"Q40244", // exchange-traded fund
|
|
"Q4118901", // exchange-traded bond fund
|
|
"Q104638128", // ETF tracking specific index
|
|
};
|
|
const mutual_fund_q_ids = [_][]const u8{
|
|
"Q1752230", // mutual fund
|
|
"Q11644608", // open-end fund
|
|
};
|
|
|
|
/// US stock exchanges accepted by the SPARQL exchange filter.
|
|
/// Without this filter, ticker collisions across global exchanges
|
|
/// silently return the wrong company.
|
|
///
|
|
/// Q-IDs:
|
|
/// Q13677 New York Stock Exchange (NYSE)
|
|
/// Q82059 Nasdaq
|
|
/// Q4527260 NYSE Arca
|
|
/// Q1666011 OTC Markets Group / Pink Sheets
|
|
const us_exchanges = [_][]const u8{
|
|
"wd:Q13677",
|
|
"wd:Q82059",
|
|
"wd:Q4527260",
|
|
"wd:Q1666011",
|
|
};
|
|
|
|
/// Country-code-to-geo-bucket lookup. Wikidata returns ISO-3166
|
|
/// alpha-2 codes via P17 → P297; we map them to the geo taxonomy
|
|
/// (`geo.us` / `geo.developed` / `geo.emerging` / `geo.unknown`).
|
|
///
|
|
/// MSCI conventions used as the developed/emerging split. Taiwan
|
|
/// and South Korea are MSCI-emerging despite FTSE classifying them
|
|
/// developed. Israel is MSCI-developed (upgraded 2010). Canada is
|
|
/// folded into International Developed (some users prefer separate
|
|
/// Canada bucket; override in `metadata.srf` if so).
|
|
const country_to_geo = std.StaticStringMap([]const u8).initComptime(.{
|
|
// United States
|
|
.{ "US", geo.us },
|
|
// Alpha-3 fallback for entries that use the longer form.
|
|
.{ "USA", geo.us },
|
|
|
|
// International Developed — Europe ex-CIS
|
|
.{ "GB", geo.developed },
|
|
.{ "DE", geo.developed },
|
|
.{ "FR", geo.developed },
|
|
.{ "NL", geo.developed },
|
|
.{ "CH", geo.developed },
|
|
.{ "SE", geo.developed },
|
|
.{ "DK", geo.developed },
|
|
.{ "NO", geo.developed },
|
|
.{ "FI", geo.developed },
|
|
.{ "IT", geo.developed },
|
|
.{ "ES", geo.developed },
|
|
.{ "BE", geo.developed },
|
|
.{ "AT", geo.developed },
|
|
.{ "IE", geo.developed },
|
|
.{ "LU", geo.developed },
|
|
.{ "PT", geo.developed },
|
|
.{ "GR", geo.developed },
|
|
.{ "IS", geo.developed },
|
|
|
|
// International Developed — Asia-Pacific + Israel + Canada
|
|
.{ "JP", geo.developed },
|
|
.{ "AU", geo.developed },
|
|
.{ "NZ", geo.developed },
|
|
.{ "SG", geo.developed },
|
|
.{ "HK", geo.developed },
|
|
.{ "IL", geo.developed },
|
|
.{ "CA", geo.developed },
|
|
|
|
// Emerging Markets (MSCI)
|
|
.{ "CN", geo.emerging },
|
|
.{ "TW", geo.emerging },
|
|
.{ "KR", geo.emerging },
|
|
.{ "IN", geo.emerging },
|
|
.{ "BR", geo.emerging },
|
|
.{ "MX", geo.emerging },
|
|
.{ "RU", geo.emerging },
|
|
.{ "TR", geo.emerging },
|
|
.{ "ZA", geo.emerging },
|
|
.{ "TH", geo.emerging },
|
|
.{ "MY", geo.emerging },
|
|
.{ "ID", geo.emerging },
|
|
.{ "PH", geo.emerging },
|
|
.{ "VN", geo.emerging },
|
|
.{ "AR", geo.emerging },
|
|
.{ "CL", geo.emerging },
|
|
.{ "CO", geo.emerging },
|
|
.{ "PE", geo.emerging },
|
|
.{ "EG", geo.emerging },
|
|
});
|
|
|
|
/// Map an ISO-3166 alpha-2 country code to one of the geo buckets.
|
|
/// Null/empty input or an unknown code returns `geo.unknown` so the
|
|
/// user can override in `metadata.srf`.
|
|
pub fn geoFor(iso2: ?[]const u8) []const u8 {
|
|
const code = iso2 orelse return geo.unknown;
|
|
if (code.len == 0) return geo.unknown;
|
|
return country_to_geo.get(code) orelse geo.unknown;
|
|
}
|
|
|
|
// ── Wikidata provider state (file-as-struct) ─────────────────────
|
|
//
|
|
// Callers do `const wikidata = @import("providers/Wikidata.zig");`
|
|
// followed by `var wd = wikidata.init(...);` and `wd.fetch(...)`.
|
|
|
|
client: http.Client,
|
|
allocator: std.mem.Allocator,
|
|
io: std.Io,
|
|
/// Contact email for User-Agent / From headers, sourced from
|
|
/// `Config.user_email`. Required; callers must surface a clear
|
|
/// missing-config error before constructing this provider.
|
|
user_email: []const u8,
|
|
|
|
const Wikidata = @This();
|
|
|
|
pub fn init(
|
|
io: std.Io,
|
|
allocator: std.mem.Allocator,
|
|
user_email: []const u8,
|
|
) Wikidata {
|
|
return .{
|
|
.client = http.Client.init(io, allocator),
|
|
.allocator = allocator,
|
|
.io = io,
|
|
.user_email = user_email,
|
|
};
|
|
}
|
|
|
|
pub fn deinit(self: *Wikidata) void {
|
|
self.client.deinit();
|
|
}
|
|
|
|
/// Fetch and parse Wikidata classifications for `symbols`.
|
|
/// Runs a single batched SPARQL query and parses the response.
|
|
/// Caller owns the returned slice and each record.
|
|
pub fn fetch(
|
|
self: *Wikidata,
|
|
result_allocator: std.mem.Allocator,
|
|
symbols: []const []const u8,
|
|
) ![]ClassificationRecord {
|
|
if (symbols.len == 0) return &.{};
|
|
|
|
const query = try buildQuery(self.allocator, symbols);
|
|
defer self.allocator.free(query);
|
|
|
|
const json = try self.postSparql(query);
|
|
defer self.allocator.free(json);
|
|
|
|
return parse(self.io, result_allocator, json, symbols);
|
|
}
|
|
|
|
/// POST a SPARQL query. Sets the User-Agent + From headers from
|
|
/// `user_email` for politeness; Wikidata explicitly recommends
|
|
/// descriptive User-Agent strings.
|
|
fn postSparql(self: *Wikidata, query: []const u8) ![]u8 {
|
|
var form_buf: std.Io.Writer.Allocating = .init(self.allocator);
|
|
defer form_buf.deinit();
|
|
try form_buf.writer.writeAll("query=");
|
|
// `Component.formatEscaped` percent-encodes everything outside
|
|
// RFC 3986's unreserved set — exactly the contract for the
|
|
// `application/x-www-form-urlencoded` body we're building.
|
|
try (std.Uri.Component{ .raw = query }).formatEscaped(&form_buf.writer);
|
|
|
|
var ua_buf: [256]u8 = undefined;
|
|
const ua = std.fmt.bufPrint(&ua_buf, "zfin/0.1 ({s})", .{self.user_email}) catch return error.UserEmailTooLong;
|
|
|
|
const headers = [_]std.http.Header{
|
|
.{ .name = "User-Agent", .value = ua },
|
|
.{ .name = "Accept", .value = "application/sparql-results+json" },
|
|
.{ .name = "Content-Type", .value = "application/x-www-form-urlencoded" },
|
|
.{ .name = "From", .value = self.user_email },
|
|
};
|
|
|
|
var resp = try self.client.request(.POST, sparql_endpoint, form_buf.written(), &headers);
|
|
defer resp.deinit();
|
|
return self.allocator.dupe(u8, resp.body);
|
|
}
|
|
|
|
/// Build the batched SPARQL query for a slice of ticker symbols.
|
|
/// Caller owns the returned bytes. Symbols interpolated via
|
|
/// `VALUES ?ticker { "AAPL" "MSFT" ... }`.
|
|
///
|
|
/// Wikidata's ticker storage is non-obvious: tickers are stored as
|
|
/// `P249` qualifiers on a `P414` (stock exchange) statement. Naive
|
|
/// `?security wdt:P249 ?ticker` returns zero rows for nearly every
|
|
/// US-listed equity. The query reaches them via:
|
|
///
|
|
/// ?security p:P414 ?stmt .
|
|
/// ?stmt ps:P414 ?exchange .
|
|
/// ?stmt pq:P249 ?ticker .
|
|
///
|
|
/// `?exchange` is filtered to a small set of US exchanges to avoid
|
|
/// ticker collisions with foreign listings.
|
|
fn buildQuery(allocator: std.mem.Allocator, symbols: []const []const u8) ![]u8 {
|
|
var aw: std.Io.Writer.Allocating = .init(allocator);
|
|
defer aw.deinit();
|
|
|
|
try aw.writer.writeAll(
|
|
\\SELECT ?ticker ?security ?securityLabel ?industryLabel ?countryCode ?inception ?cik ?instance WHERE {
|
|
\\ VALUES ?ticker {
|
|
);
|
|
for (symbols) |s| {
|
|
try aw.writer.print(" \"{s}\"", .{s});
|
|
}
|
|
try aw.writer.writeAll(" }\n");
|
|
try aw.writer.writeAll(" VALUES ?exchange {");
|
|
for (us_exchanges) |x| {
|
|
try aw.writer.print(" {s}", .{x});
|
|
}
|
|
try aw.writer.writeAll(" }\n");
|
|
try aw.writer.writeAll(
|
|
\\ ?security p:P414 ?exchstmt .
|
|
\\ ?exchstmt ps:P414 ?exchange .
|
|
\\ ?exchstmt pq:P249 ?ticker .
|
|
\\ OPTIONAL { ?security wdt:P452 ?industry . }
|
|
\\ OPTIONAL { ?security wdt:P17 ?country . ?country wdt:P297 ?countryCode . }
|
|
\\ OPTIONAL { ?security wdt:P571 ?inception . }
|
|
\\ OPTIONAL { ?security wdt:P5531 ?cik . }
|
|
\\ OPTIONAL { ?security wdt:P31 ?instance . }
|
|
\\ SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
|
\\}
|
|
);
|
|
return aw.toOwnedSlice();
|
|
}
|
|
|
|
/// Parse the SPARQL JSON response into `ClassificationRecord` values.
|
|
/// Multiple bindings for the same ticker (e.g. multiple `instance of`
|
|
/// values) get merged into one record — first-non-null wins.
|
|
fn parse(
|
|
io: std.Io,
|
|
allocator: std.mem.Allocator,
|
|
json_bytes: []const u8,
|
|
expected_symbols: []const []const u8,
|
|
) ![]ClassificationRecord {
|
|
const today = fmt.todayDate(io);
|
|
var as_of_buf: [10]u8 = undefined;
|
|
const as_of = try std.fmt.bufPrint(&as_of_buf, "{f}", .{today});
|
|
|
|
const parsed = std.json.parseFromSlice(std.json.Value, allocator, json_bytes, .{}) catch
|
|
return &.{};
|
|
defer parsed.deinit();
|
|
|
|
const root = switch (parsed.value) {
|
|
.object => |o| o,
|
|
else => return &.{},
|
|
};
|
|
const results = switch (root.get("results") orelse return &.{}) {
|
|
.object => |o| o,
|
|
else => return &.{},
|
|
};
|
|
const bindings = switch (results.get("bindings") orelse return &.{}) {
|
|
.array => |a| a.items,
|
|
else => return &.{},
|
|
};
|
|
|
|
// Map symbol → record; merge multiple bindings.
|
|
var by_symbol: std.StringHashMap(ClassificationRecord) = .init(allocator);
|
|
defer {
|
|
var it = by_symbol.valueIterator();
|
|
while (it.next()) |r| r.deinit(allocator);
|
|
by_symbol.deinit();
|
|
}
|
|
|
|
for (bindings) |b| {
|
|
const obj = switch (b) {
|
|
.object => |o| o,
|
|
else => continue,
|
|
};
|
|
const ticker = sparqlValue(obj, "ticker") orelse continue;
|
|
|
|
// Verify ticker is one we asked for. Wikidata can return
|
|
// surprising matches (foreign exchanges); skip those.
|
|
var matched = false;
|
|
for (expected_symbols) |s| {
|
|
if (std.ascii.eqlIgnoreCase(s, ticker)) {
|
|
matched = true;
|
|
break;
|
|
}
|
|
}
|
|
if (!matched) continue;
|
|
|
|
const existing_or_new = try by_symbol.getOrPut(ticker);
|
|
if (!existing_or_new.found_existing) {
|
|
existing_or_new.key_ptr.* = try allocator.dupe(u8, ticker);
|
|
existing_or_new.value_ptr.* = .{
|
|
.symbol = try allocator.dupe(u8, ticker),
|
|
.as_of = try allocator.dupe(u8, as_of),
|
|
.source = "wikidata",
|
|
};
|
|
}
|
|
const rec = existing_or_new.value_ptr;
|
|
|
|
if (rec.name == null) {
|
|
if (sparqlValue(obj, "securityLabel")) |label| {
|
|
rec.name = try allocator.dupe(u8, label);
|
|
}
|
|
}
|
|
if (rec.industry == null) {
|
|
if (sparqlValue(obj, "industryLabel")) |ind| {
|
|
rec.industry = try allocator.dupe(u8, ind);
|
|
rec.sector = try allocator.dupe(u8, ind);
|
|
}
|
|
}
|
|
if (rec.country == null) {
|
|
if (sparqlValue(obj, "countryCode")) |c| {
|
|
rec.country = try allocator.dupe(u8, c);
|
|
}
|
|
}
|
|
if (rec.inception_date == null) {
|
|
if (sparqlValue(obj, "inception")) |d| {
|
|
if (d.len >= 10) {
|
|
rec.inception_date = try allocator.dupe(u8, d[0..10]);
|
|
}
|
|
}
|
|
}
|
|
if (rec.cik == null) {
|
|
if (sparqlValue(obj, "cik")) |c| {
|
|
rec.cik = try allocator.dupe(u8, c);
|
|
}
|
|
}
|
|
if (sparqlValue(obj, "instance")) |inst_iri| {
|
|
// The "instance" value is a Q-ID URI like
|
|
// "http://www.wikidata.org/entity/Q40244". Extract the
|
|
// Q-ID suffix and test against our known sets.
|
|
const last_slash = std.mem.lastIndexOfScalar(u8, inst_iri, '/');
|
|
const q_id = if (last_slash) |i| inst_iri[i + 1 ..] else inst_iri;
|
|
for (etf_q_ids) |target| {
|
|
if (std.mem.eql(u8, q_id, target)) {
|
|
rec.is_etf = true;
|
|
if (rec.asset_class == null) {
|
|
rec.asset_class = try allocator.dupe(u8, "ETF (uncategorized)");
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
for (mutual_fund_q_ids) |target| {
|
|
if (std.mem.eql(u8, q_id, target)) {
|
|
rec.is_etf = true;
|
|
if (rec.asset_class == null) {
|
|
rec.asset_class = try allocator.dupe(u8, "Mutual Fund (uncategorized)");
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Drain map into owned slice. Caller takes ownership; our defer
|
|
// above calls deinit on values, so clear the map before returning
|
|
// to avoid double-free.
|
|
var out = try allocator.alloc(ClassificationRecord, by_symbol.count());
|
|
var idx: usize = 0;
|
|
var it = by_symbol.iterator();
|
|
while (it.next()) |entry| {
|
|
out[idx] = entry.value_ptr.*;
|
|
idx += 1;
|
|
}
|
|
var key_it = by_symbol.keyIterator();
|
|
while (key_it.next()) |k| allocator.free(k.*);
|
|
by_symbol.clearRetainingCapacity();
|
|
return out;
|
|
}
|
|
|
|
/// Pull the `.value` string out of a SPARQL JSON binding object's
|
|
/// named field. Returns null if absent or non-string.
|
|
fn sparqlValue(obj: std.json.ObjectMap, field: []const u8) ?[]const u8 {
|
|
const slot = obj.get(field) orelse return null;
|
|
const slot_obj = switch (slot) {
|
|
.object => |o| o,
|
|
else => return null,
|
|
};
|
|
const val = slot_obj.get("value") orelse return null;
|
|
return switch (val) {
|
|
.string => |s| s,
|
|
else => null,
|
|
};
|
|
}
|
|
|
|
// ── Tests ────────────────────────────────────────────────────────
|
|
|
|
test "buildQuery includes all symbols and required SELECT vars" {
|
|
const allocator = std.testing.allocator;
|
|
const syms = [_][]const u8{ "AAPL", "VTI" };
|
|
const q = try buildQuery(allocator, &syms);
|
|
defer allocator.free(q);
|
|
|
|
try std.testing.expect(std.mem.indexOf(u8, q, "\"AAPL\"") != null);
|
|
try std.testing.expect(std.mem.indexOf(u8, q, "\"VTI\"") != null);
|
|
try std.testing.expect(std.mem.indexOf(u8, q, "p:P414") != null);
|
|
try std.testing.expect(std.mem.indexOf(u8, q, "pq:P249") != null);
|
|
try std.testing.expect(std.mem.indexOf(u8, q, "wdt:P452") != null);
|
|
try std.testing.expect(std.mem.indexOf(u8, q, "wdt:P17") != null);
|
|
// US-exchange filter must be present — without it, US tickers
|
|
// collide with foreign exchanges (MRK→Merck KGaA, PG→People's
|
|
// Garment, etc.). See `us_exchanges` doc-block.
|
|
try std.testing.expect(std.mem.indexOf(u8, q, "wd:Q13677") != null); // NYSE
|
|
try std.testing.expect(std.mem.indexOf(u8, q, "wd:Q82059") != null); // Nasdaq
|
|
try std.testing.expect(std.mem.indexOf(u8, q, "ps:P414 ?exchange") != null);
|
|
}
|
|
|
|
test "parse: AAPL fixture round-trips name + industry + country" {
|
|
const fixture =
|
|
\\{
|
|
\\ "head": {"vars": ["ticker", "security", "securityLabel", "industryLabel", "countryCode", "inception", "cik", "instance"]},
|
|
\\ "results": {
|
|
\\ "bindings": [
|
|
\\ {
|
|
\\ "ticker": {"type": "literal", "value": "AAPL"},
|
|
\\ "security": {"type": "uri", "value": "http://www.wikidata.org/entity/Q312"},
|
|
\\ "securityLabel": {"type": "literal", "value": "Apple Inc."},
|
|
\\ "industryLabel": {"type": "literal", "value": "consumer electronics"},
|
|
\\ "countryCode": {"type": "literal", "value": "US"},
|
|
\\ "instance": {"type": "uri", "value": "http://www.wikidata.org/entity/Q4830453"}
|
|
\\ }
|
|
\\ ]
|
|
\\ }
|
|
\\}
|
|
;
|
|
|
|
const allocator = std.testing.allocator;
|
|
const expected = [_][]const u8{"AAPL"};
|
|
const recs = try parse(std.testing.io, allocator, fixture, &expected);
|
|
defer {
|
|
for (recs) |*r| {
|
|
var m = r.*;
|
|
m.deinit(allocator);
|
|
}
|
|
allocator.free(recs);
|
|
}
|
|
|
|
try std.testing.expectEqual(@as(usize, 1), recs.len);
|
|
try std.testing.expectEqualStrings("AAPL", recs[0].symbol);
|
|
try std.testing.expectEqualStrings("Apple Inc.", recs[0].name.?);
|
|
try std.testing.expectEqualStrings("consumer electronics", recs[0].industry.?);
|
|
try std.testing.expectEqualStrings("consumer electronics", recs[0].sector.?);
|
|
try std.testing.expectEqualStrings("US", recs[0].country.?);
|
|
try std.testing.expect(!recs[0].is_etf);
|
|
}
|
|
|
|
test "parse: ETF fixture sets is_etf=true and asset_class" {
|
|
const fixture =
|
|
\\{
|
|
\\ "head": {"vars": ["ticker", "security", "securityLabel", "instance"]},
|
|
\\ "results": {
|
|
\\ "bindings": [
|
|
\\ {
|
|
\\ "ticker": {"type": "literal", "value": "VTI"},
|
|
\\ "security": {"type": "uri", "value": "http://www.wikidata.org/entity/Q1809462"},
|
|
\\ "securityLabel": {"type": "literal", "value": "Vanguard Total Stock Market ETF"},
|
|
\\ "instance": {"type": "uri", "value": "http://www.wikidata.org/entity/Q40244"}
|
|
\\ }
|
|
\\ ]
|
|
\\ }
|
|
\\}
|
|
;
|
|
|
|
const allocator = std.testing.allocator;
|
|
const expected = [_][]const u8{"VTI"};
|
|
const recs = try parse(std.testing.io, allocator, fixture, &expected);
|
|
defer {
|
|
for (recs) |*r| {
|
|
var m = r.*;
|
|
m.deinit(allocator);
|
|
}
|
|
allocator.free(recs);
|
|
}
|
|
|
|
try std.testing.expectEqual(@as(usize, 1), recs.len);
|
|
try std.testing.expect(recs[0].is_etf);
|
|
try std.testing.expectEqualStrings("ETF (uncategorized)", recs[0].asset_class.?);
|
|
}
|
|
|
|
test "parse: bindings for symbols not requested are dropped" {
|
|
const fixture =
|
|
\\{
|
|
\\ "head": {"vars": ["ticker", "security", "securityLabel"]},
|
|
\\ "results": {
|
|
\\ "bindings": [
|
|
\\ {"ticker": {"type": "literal", "value": "WRONG"},
|
|
\\ "security": {"type": "uri", "value": "http://example/Q1"},
|
|
\\ "securityLabel": {"type": "literal", "value": "Wrong Co"}}
|
|
\\ ]
|
|
\\ }
|
|
\\}
|
|
;
|
|
|
|
const allocator = std.testing.allocator;
|
|
const expected = [_][]const u8{"AAPL"};
|
|
const recs = try parse(std.testing.io, allocator, fixture, &expected);
|
|
defer allocator.free(recs);
|
|
|
|
try std.testing.expectEqual(@as(usize, 0), recs.len);
|
|
}
|
|
|
|
test "geoFor maps known ISO-3166 codes to bucket" {
|
|
try std.testing.expectEqualStrings(geo.us, geoFor("US"));
|
|
try std.testing.expectEqualStrings(geo.us, geoFor("USA"));
|
|
try std.testing.expectEqualStrings(geo.developed, geoFor("GB"));
|
|
try std.testing.expectEqualStrings(geo.developed, geoFor("DE"));
|
|
try std.testing.expectEqualStrings(geo.developed, geoFor("CA"));
|
|
try std.testing.expectEqualStrings(geo.developed, geoFor("IL"));
|
|
try std.testing.expectEqualStrings(geo.emerging, geoFor("CN"));
|
|
try std.testing.expectEqualStrings(geo.emerging, geoFor("TW"));
|
|
try std.testing.expectEqualStrings(geo.emerging, geoFor("KR"));
|
|
}
|
|
|
|
test "geoFor returns Unknown for null/empty/unmapped" {
|
|
try std.testing.expectEqualStrings(geo.unknown, geoFor(null));
|
|
try std.testing.expectEqualStrings(geo.unknown, geoFor(""));
|
|
try std.testing.expectEqualStrings(geo.unknown, geoFor("ZZ")); // unassigned ISO-2
|
|
try std.testing.expectEqualStrings(geo.unknown, geoFor("XX"));
|
|
}
|