Merge commit '2af495d056616cc0f757a055114b56df2e0d5d84' as 'projects/bad-nlp/name-database'
This commit is contained in:
53
name-database/tasks/exporters/sqlite.rake
Normal file
53
name-database/tasks/exporters/sqlite.rake
Normal file
@@ -0,0 +1,53 @@
|
||||
require 'sequel'
|
||||
|
||||
task "export:sqlite" => [:db, "export:sqlite:schema"] do
|
||||
|
||||
#first insert the given_names
|
||||
@names = @output[:names]
|
||||
@meta = @output[:metadata]
|
||||
|
||||
puts "dumping given names"
|
||||
@db.given_names.each do |entry|
|
||||
row = @names.insert :name => entry.name, :type => "given"
|
||||
|
||||
entry.metadata_without_nesting.each do |key, value|
|
||||
@meta.insert :key => key, :value => value
|
||||
end
|
||||
end
|
||||
|
||||
# puts "dumping family names"
|
||||
# @db.family_names.each do |entry|
|
||||
# @names << {:name => entry.name, :type => "family"}
|
||||
# end
|
||||
|
||||
end
|
||||
|
||||
namespace "export:sqlite" do
|
||||
task :db => "out" do
|
||||
|
||||
existing = Dir["out/*.sqlite"].length
|
||||
path = "out/names#{(".#{existing}" if existing > 0)}.sqlite"
|
||||
|
||||
@output = Sequel.sqlite(path)
|
||||
end
|
||||
|
||||
task :schema => "export:sqlite:db" do
|
||||
|
||||
@output.create_table :names do
|
||||
primary_key :id
|
||||
String :name
|
||||
String :type
|
||||
|
||||
index [:type, :name]
|
||||
end
|
||||
|
||||
@output.create_table :metadata do
|
||||
foreign_key :name_id, :names
|
||||
String :key
|
||||
String :value
|
||||
|
||||
index [:name_id, :key]
|
||||
end
|
||||
|
||||
end
|
||||
end
|
59
name-database/tasks/importers/census1990.rake
Normal file
59
name-database/tasks/importers/census1990.rake
Normal file
@@ -0,0 +1,59 @@
|
||||
|
||||
desc "imports the census 2000 data files and merged the data with the existing data files"
|
||||
task "import:census1990" => "import:census1990:run"
|
||||
|
||||
namespace "import:census1990" do
|
||||
|
||||
task :run => [:db, "import:census1990:male", "import:census1990:female", "import:census1990:family"] do
|
||||
@db.write
|
||||
end
|
||||
|
||||
task :male => :db do
|
||||
|
||||
open("sources/census-1990/dist.male.first", 'r') do |file|
|
||||
file.each do |line|
|
||||
fields = line.split(/\s+/)
|
||||
name = fields.first
|
||||
entry = @db.given_names.get name
|
||||
|
||||
|
||||
entry.meta[:gender] = case entry.meta[:gender]
|
||||
when "female", "unisex" then
|
||||
"unisex"
|
||||
else
|
||||
"male"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
task :female => :db do
|
||||
open("sources/census-1990/dist.female.first", 'r') do |file|
|
||||
file.each do |line|
|
||||
fields = line.split(/\s+/)
|
||||
name = fields.first
|
||||
entry = @db.given_names.get name
|
||||
|
||||
entry.meta[:gender] = case entry.meta[:gender]
|
||||
when "male", "unisex" then
|
||||
"unisex"
|
||||
else
|
||||
"female"
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
task :family => :db do
|
||||
open("sources/census-1990/dist.all.last", 'r') do |file|
|
||||
file.each do |line|
|
||||
fields = line.split(/\s+/)
|
||||
name = fields.first
|
||||
@db.family_names.get name
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
end
|
20
name-database/tasks/importers/census2000.rake
Normal file
20
name-database/tasks/importers/census2000.rake
Normal file
@@ -0,0 +1,20 @@
|
||||
desc "imports the census 2000 data files and merged the data with the existing data files"
|
||||
task "import:census2000" => "import:census2000:run"
|
||||
|
||||
namespace "import:census2000" do
|
||||
task :run => :db do
|
||||
|
||||
|
||||
open("sources/census-2000/app_c.csv", 'r') do |file|
|
||||
|
||||
file.gets #skip the header
|
||||
file.each do |line|
|
||||
fields = line.split(",")
|
||||
name = fields.first
|
||||
@db.family_names.get name
|
||||
end
|
||||
end
|
||||
|
||||
@db.write
|
||||
end
|
||||
end
|
42
name-database/tasks/main.rake
Normal file
42
name-database/tasks/main.rake
Normal file
@@ -0,0 +1,42 @@
|
||||
directory "out"
|
||||
|
||||
task "clean" do
|
||||
FileUtils.rm_rf "out"
|
||||
end
|
||||
|
||||
["given", "family"].each do |set|
|
||||
|
||||
rule Regexp.new("name:#{set}:[a-z]+") do |t|
|
||||
name = t.name.gsub("name:#{set}:", '')
|
||||
path = path_for_name(set, name)
|
||||
|
||||
Rake::Task[path].invoke
|
||||
|
||||
parsed = YAML.load(IO.read path)
|
||||
parsed ||= []
|
||||
|
||||
next if entry_names(parsed).index(name)
|
||||
parsed << name
|
||||
parsed.sort!{|l,r| entry_name(l) <=> entry_name(r)}
|
||||
open(path, 'w'){|f| f.puts YAML.dump(parsed) }
|
||||
end
|
||||
|
||||
# opens a textmate window to the appropriate file and line of the name specified
|
||||
rule Regexp.new("mate:#{set}:[a-z]+") => [proc{|name| name.gsub("mate:", "name:")}] do |t|
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
task :default => :db do
|
||||
|
||||
entry = @db.family_names.get "smith"
|
||||
|
||||
@db.write
|
||||
end
|
||||
|
||||
task :db do
|
||||
$:.unshift File.dirname(__FILE__) + "/../lib"
|
||||
require 'name-database'
|
||||
|
||||
@db = NameDatabase.new(".")
|
||||
end
|
13
name-database/tasks/setup.rake
Normal file
13
name-database/tasks/setup.rake
Normal file
@@ -0,0 +1,13 @@
|
||||
["given", "family"].each do |set|
|
||||
("aa".."zz").each do |prefix|
|
||||
path = "#{set}_name/#{prefix.first}/#{prefix}.yml"
|
||||
dir = File.dirname(path)
|
||||
directory dir
|
||||
file path => dir do
|
||||
open(path, "w") do |f|
|
||||
f.puts "---"
|
||||
f.puts "# This file contain #{set} names starting with #{prefix}"
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
17
name-database/tasks/support.rb
Normal file
17
name-database/tasks/support.rb
Normal file
@@ -0,0 +1,17 @@
|
||||
|
||||
def path_for_name(set, name)
|
||||
"#{set}_name/#{name.first}/#{name.first(2)}.yml"
|
||||
end
|
||||
|
||||
def entry_names(list)
|
||||
list.map do |entry|
|
||||
entry_name entry
|
||||
end.compact
|
||||
end
|
||||
|
||||
def entry_name(entry)
|
||||
case entry
|
||||
when String: entry
|
||||
when Hash: entry.keys.first
|
||||
end
|
||||
end
|
16
name-database/tasks/validations.rake
Normal file
16
name-database/tasks/validations.rake
Normal file
@@ -0,0 +1,16 @@
|
||||
task :validate_data do
|
||||
paths = Dir["{given,family}_name/**/*.yml"]
|
||||
pad_length = 55
|
||||
|
||||
paths.each do |path|
|
||||
begin
|
||||
STDOUT.write path.ljust(pad_length, '.')
|
||||
loaded = YAML.load(IO.read path)
|
||||
puts "success"
|
||||
rescue ArgumentError => e
|
||||
puts "failed"
|
||||
puts e.message
|
||||
puts
|
||||
end
|
||||
end
|
||||
end
|
Reference in New Issue
Block a user