Merge commit '2af495d056616cc0f757a055114b56df2e0d5d84' as 'projects/bad-nlp/name-database'
This commit is contained in:
20
projects/bad-nlp/name-database/lib/name-database.rb
Normal file
20
projects/bad-nlp/name-database/lib/name-database.rb
Normal file
@@ -0,0 +1,20 @@
|
||||
require 'active_support'
|
||||
|
||||
class NameDatabase
|
||||
require 'name-database/set'
|
||||
|
||||
attr_reader :given_names
|
||||
attr_reader :family_names
|
||||
|
||||
def initialize(dir = File.dirname(__FILE__) + "/..")
|
||||
|
||||
@given_names = NameDatabase::Set.new(dir + "/given_name")
|
||||
@family_names = NameDatabase::Set.new(dir + "/family_name")
|
||||
|
||||
end
|
||||
|
||||
def write
|
||||
@given_names.write
|
||||
@family_names.write
|
||||
end
|
||||
end
|
52
projects/bad-nlp/name-database/lib/name-database/entry.rb
Normal file
52
projects/bad-nlp/name-database/lib/name-database/entry.rb
Normal file
@@ -0,0 +1,52 @@
|
||||
class NameDatabase::Entry
|
||||
attr_reader :name
|
||||
attr_reader :set
|
||||
attr_reader :file
|
||||
attr_reader :meta
|
||||
|
||||
attr_reader :line
|
||||
|
||||
def initialize(set, file, data)
|
||||
@set = set
|
||||
@file = file
|
||||
|
||||
case data
|
||||
when String then
|
||||
@name = data
|
||||
@meta = {}.with_indifferent_access
|
||||
when Hash then
|
||||
@name = data.keys.first
|
||||
@meta = data.values.first.with_indifferent_access
|
||||
end
|
||||
end
|
||||
|
||||
def metadata_without_nesting
|
||||
returning({}) do |result|
|
||||
NameDatabase::Entry.flattened_meta("", result, meta)
|
||||
end
|
||||
end
|
||||
|
||||
def self.flattened_meta(prefix, result, meta)
|
||||
meta.each do |key, value|
|
||||
flattened_key = prefix.blank? ? key : "#{prefix}/#{key}"
|
||||
|
||||
case value
|
||||
when Hash
|
||||
result.merge! flattened_meta(flattened_key, result, value)
|
||||
else
|
||||
result[flattened_key] = value
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
def path()
|
||||
set.data_file_path(name)
|
||||
end
|
||||
|
||||
def merge(other)
|
||||
new_data = {name => meta.merge(other.meta)}
|
||||
NameDatabase::Entry.new(self.set, self.file, new_data)
|
||||
end
|
||||
|
||||
end
|
63
projects/bad-nlp/name-database/lib/name-database/file.rb
Normal file
63
projects/bad-nlp/name-database/lib/name-database/file.rb
Normal file
@@ -0,0 +1,63 @@
|
||||
class NameDatabase::File
|
||||
attr_reader :set
|
||||
attr_reader :path
|
||||
|
||||
attr_reader :entries
|
||||
|
||||
def initialize(set, path)
|
||||
@set = set
|
||||
@path = path
|
||||
@entries = {}
|
||||
@loaded = false
|
||||
end
|
||||
|
||||
def get(name)
|
||||
load
|
||||
@entries[name] ||= NameDatabase::Entry.new(self.set, self, name)
|
||||
end
|
||||
|
||||
def each(&block)
|
||||
load
|
||||
@entries.values.each(&block)
|
||||
end
|
||||
|
||||
def write
|
||||
# convert entries to format suitable for yaml
|
||||
# write to file
|
||||
output = []
|
||||
entries.values.sort_by(&:name).each do |entry|
|
||||
if entry.meta.blank?
|
||||
output << entry.name
|
||||
else
|
||||
output << {entry.name => entry.meta.stringify_keys}
|
||||
end
|
||||
end
|
||||
|
||||
FileUtils.mkdir_p File.dirname(path)
|
||||
open(path, 'w'){|f| f.puts YAML.dump(output) }
|
||||
|
||||
end
|
||||
|
||||
|
||||
private
|
||||
def validate(raw)
|
||||
|
||||
end
|
||||
|
||||
def load
|
||||
return if @loaded
|
||||
return unless File.exists?(path)
|
||||
|
||||
raw_data = YAML.load(IO.read path)
|
||||
validate raw_data
|
||||
|
||||
raw_data.each do |record|
|
||||
entry = NameDatabase::Entry.new(self.set, self, record)
|
||||
existing = @entries[entry.name]
|
||||
@entries[entry.name] = existing ? existing.merge(entry) : entry
|
||||
end
|
||||
@loaded = true
|
||||
end
|
||||
|
||||
|
||||
end
|
47
projects/bad-nlp/name-database/lib/name-database/set.rb
Normal file
47
projects/bad-nlp/name-database/lib/name-database/set.rb
Normal file
@@ -0,0 +1,47 @@
|
||||
class NameDatabase::Set
|
||||
require 'name-database/file'
|
||||
require 'name-database/entry'
|
||||
|
||||
attr_reader :base_path
|
||||
attr_reader :parsed_files
|
||||
|
||||
def initialize(base_path)
|
||||
@base_path = base_path
|
||||
@parsed_files = {}
|
||||
end
|
||||
|
||||
def get(name)
|
||||
name = normalize_name name
|
||||
data_path = data_path_file(name)
|
||||
file = get_file(data_path)
|
||||
|
||||
file.get(name)
|
||||
end
|
||||
|
||||
def data_path_file(name)
|
||||
"#{base_path}/#{name.first}/#{name.first(2)}.yml"
|
||||
end
|
||||
|
||||
def write
|
||||
parsed_files.values.each(&:write)
|
||||
end
|
||||
|
||||
def all_files
|
||||
Dir["#{base_path}/**/*.yml"].map{|p| get_file(p)}
|
||||
end
|
||||
|
||||
def each(&block)
|
||||
all_files.each do |file|
|
||||
file.each(&block)
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
def get_file(path)
|
||||
parsed_files[path] ||= NameDatabase::File.new(self, path)
|
||||
end
|
||||
|
||||
def normalize_name(name)
|
||||
name.downcase
|
||||
end
|
||||
end
|
Reference in New Issue
Block a user