Merge commit '2af495d056616cc0f757a055114b56df2e0d5d84' as 'projects/bad-nlp/name-database'

This commit is contained in:
2023-03-20 18:03:18 -06:00
669 changed files with 423076 additions and 0 deletions

View File

@@ -0,0 +1,20 @@
require 'active_support'
class NameDatabase
require 'name-database/set'
attr_reader :given_names
attr_reader :family_names
def initialize(dir = File.dirname(__FILE__) + "/..")
@given_names = NameDatabase::Set.new(dir + "/given_name")
@family_names = NameDatabase::Set.new(dir + "/family_name")
end
def write
@given_names.write
@family_names.write
end
end

View File

@@ -0,0 +1,52 @@
class NameDatabase::Entry
attr_reader :name
attr_reader :set
attr_reader :file
attr_reader :meta
attr_reader :line
def initialize(set, file, data)
@set = set
@file = file
case data
when String then
@name = data
@meta = {}.with_indifferent_access
when Hash then
@name = data.keys.first
@meta = data.values.first.with_indifferent_access
end
end
def metadata_without_nesting
returning({}) do |result|
NameDatabase::Entry.flattened_meta("", result, meta)
end
end
def self.flattened_meta(prefix, result, meta)
meta.each do |key, value|
flattened_key = prefix.blank? ? key : "#{prefix}/#{key}"
case value
when Hash
result.merge! flattened_meta(flattened_key, result, value)
else
result[flattened_key] = value
end
end
end
def path()
set.data_file_path(name)
end
def merge(other)
new_data = {name => meta.merge(other.meta)}
NameDatabase::Entry.new(self.set, self.file, new_data)
end
end

View File

@@ -0,0 +1,63 @@
class NameDatabase::File
attr_reader :set
attr_reader :path
attr_reader :entries
def initialize(set, path)
@set = set
@path = path
@entries = {}
@loaded = false
end
def get(name)
load
@entries[name] ||= NameDatabase::Entry.new(self.set, self, name)
end
def each(&block)
load
@entries.values.each(&block)
end
def write
# convert entries to format suitable for yaml
# write to file
output = []
entries.values.sort_by(&:name).each do |entry|
if entry.meta.blank?
output << entry.name
else
output << {entry.name => entry.meta.stringify_keys}
end
end
FileUtils.mkdir_p File.dirname(path)
open(path, 'w'){|f| f.puts YAML.dump(output) }
end
private
def validate(raw)
end
def load
return if @loaded
return unless File.exists?(path)
raw_data = YAML.load(IO.read path)
validate raw_data
raw_data.each do |record|
entry = NameDatabase::Entry.new(self.set, self, record)
existing = @entries[entry.name]
@entries[entry.name] = existing ? existing.merge(entry) : entry
end
@loaded = true
end
end

View File

@@ -0,0 +1,47 @@
class NameDatabase::Set
require 'name-database/file'
require 'name-database/entry'
attr_reader :base_path
attr_reader :parsed_files
def initialize(base_path)
@base_path = base_path
@parsed_files = {}
end
def get(name)
name = normalize_name name
data_path = data_path_file(name)
file = get_file(data_path)
file.get(name)
end
def data_path_file(name)
"#{base_path}/#{name.first}/#{name.first(2)}.yml"
end
def write
parsed_files.values.each(&:write)
end
def all_files
Dir["#{base_path}/**/*.yml"].map{|p| get_file(p)}
end
def each(&block)
all_files.each do |file|
file.each(&block)
end
end
private
def get_file(path)
parsed_files[path] ||= NameDatabase::File.new(self, path)
end
def normalize_name(name)
name.downcase
end
end