API

unihan_db.bootstrap.add_to_dict(b)

Add to_dict() method to SQLAlchemy Base object.

Parameters:b (declarative_base()) – SQLAlchemy Base class
unihan_db.bootstrap.bootstrap_unihan(session, options={})

Download, extract and import unihan to database.

unihan_db.bootstrap.get_session(engine_url=u'sqlite:///{user_data_dir}/unihan_db.db')

Return new SQLAlchemy session object from engine string.

engine_url accepts a string template variable for {user_data_dir}, which is replaced to the XDG data directory for the user running the script process. This variable is only useful for SQLite, where file paths are used for the engine_url.

Parameters:engine_url (string) – SQLAlchemy engine string
unihan_db.bootstrap.is_bootstrapped(metadata)

Return True if cihai is correctly bootstrapped.

unihan_db.bootstrap.setup_logger(logger=None, level=u'INFO')

Setup logging for CLI use.

Parameters:logger (Logger) – instance of logger
unihan_db.bootstrap.to_dict(obj, found=None)

Return dictionary of an SQLAlchemy Query result.

Supports recursive relationships.

Parameters:
Returns:

dictionary of results

Return type:

dict

unihan_db table design

Tables are split into general categories, similar to how UNIHAN db’s files are:

  • Unhn_DictionaryIndices
  • Unhn_DictionaryLikeData
  • Unhn_IRGSources
  • Unhn_NumericValues
  • Unhn_OtherMappings
  • Unhn_RadicalStrokeCounts
  • Unhn_Readings
  • Unhn_Variants

Tables are prefixed Unhn_, with no vowels.

Those root tables include the base data for all 90 UNIHAN fields. Specialized values branched off into field-specialized tables through polymorphic joins.

class unihan_db.tables.GenericIRG(**kwargs)
char_id
id
location
source
type
class unihan_db.tables.GenericIndice(**kwargs)
char_id
id
locations
type
class unihan_db.tables.GenericRadicalStrokes(**kwargs)
char_id
id
radical
simplified
strokes
type
class unihan_db.tables.GenericReading(**kwargs)
char_id
id
locations
readings
type
class unihan_db.tables.Unhn(**kwargs)
char
kCCCII
kCantonese
kCheungBauer
kCheungBauerIndex
kCihaiT
kDaeJaweon
kDefinition
kFenn
kFennIndex
kGSR
kHDZRadBreak
kHanYu
kHanyuPinlu
kHanyuPinyin
kIICore
kIRGDaeJaweon
kIRGHanyuDaZidian
kIRGKangXi
kIRG_GSource
kIRG_HSource
kIRG_JSource
kIRG_KPSource
kIRG_KSource
kIRG_MSource
kIRG_TSource
kIRG_USource
kIRG_VSource
kMandarin
kRSAdobe_Japan1_6
kRSJapanese
kRSKanWa
kRSKangXi
kRSKorean
kRSUnicode
kSBGY
kTotalStrokes
kXHC1983
ucn
class unihan_db.tables.UnhnLocation(**kwargs)
character
generic_indice_id
generic_reading_id
id
page
virtual
volume
class unihan_db.tables.UnhnLocationkXHC1983(**kwargs)
character
entry
generic_indice_id
generic_reading_id
id
page
substituted
class unihan_db.tables.UnhnReading(**kwargs)
generic_reading_id
id
reading
class unihan_db.tables.kCCCII(**kwargs)
char_id
hex
id
class unihan_db.tables.kCantonese(**kwargs)
char_id
definition
id
class unihan_db.tables.kCheungBauer(**kwargs)
cangjie
char_id
id
locations
radical
readings
strokes
type
class unihan_db.tables.kCheungBauerIndex(**kwargs)
char_id
id
locations
type
class unihan_db.tables.kCihaiT(**kwargs)
char_id
character
id
page
row
class unihan_db.tables.kDaeJaweon(**kwargs)
char_id
id
locations
type
class unihan_db.tables.kDefinition(**kwargs)
char_id
definition
id
class unihan_db.tables.kFenn(**kwargs)
char_id
frequency
id
phonetic
class unihan_db.tables.kFennIndex(**kwargs)
char_id
id
locations
type
class unihan_db.tables.kGSR(**kwargs)
apostrophe
char_id
id
letter
set
class unihan_db.tables.kHDZRadBreak(**kwargs)
char_id
id
locations
radical
type
ucn
class unihan_db.tables.kHanYu(**kwargs)
char_id
id
locations
type
class unihan_db.tables.kHanyuPinlu(**kwargs)
char_id
frequency
id
phonetic
class unihan_db.tables.kHanyuPinyin(**kwargs)
char_id
id
locations
readings
type
class unihan_db.tables.kIICore(**kwargs)
char_id
id
priority
sources
class unihan_db.tables.kIICoreSource(**kwargs)
id
source
source_id
class unihan_db.tables.kIRGDaeJaweon(**kwargs)
char_id
id
locations
type
class unihan_db.tables.kIRGHanyuDaZidian(**kwargs)
char_id
id
locations
type
class unihan_db.tables.kIRGKangXi(**kwargs)
char_id
id
locations
type
class unihan_db.tables.kIRG_GSource(**kwargs)
char_id
id
location
source
type
class unihan_db.tables.kIRG_HSource(**kwargs)
char_id
id
location
source
type
class unihan_db.tables.kIRG_JSource(**kwargs)
char_id
id
location
source
type
class unihan_db.tables.kIRG_KPSource(**kwargs)
char_id
id
location
source
type
class unihan_db.tables.kIRG_KSource(**kwargs)
char_id
id
location
source
type
class unihan_db.tables.kIRG_MSource(**kwargs)
char_id
id
location
source
type
class unihan_db.tables.kIRG_TSource(**kwargs)
char_id
id
location
source
type
class unihan_db.tables.kIRG_USource(**kwargs)
char_id
id
location
source
type
class unihan_db.tables.kIRG_VSource(**kwargs)
char_id
id
location
source
type
class unihan_db.tables.kMandarin(**kwargs)
char_id
hans
hant
id
class unihan_db.tables.kRSAdobe_Japan1_6(**kwargs)
char_id
cid
id
radical
strokes
strokes_residue
type
class unihan_db.tables.kRSJapanese(**kwargs)
char_id
id
radical
simplified
strokes
type
class unihan_db.tables.kRSKanWa(**kwargs)
char_id
id
radical
simplified
strokes
type
class unihan_db.tables.kRSKangXi(**kwargs)
char_id
id
radical
simplified
strokes
type
class unihan_db.tables.kRSKorean(**kwargs)
char_id
id
radical
simplified
strokes
type
class unihan_db.tables.kRSUnicode(**kwargs)
char_id
id
radical
simplified
strokes
type
class unihan_db.tables.kSBGY(**kwargs)
char_id
id
locations
type
class unihan_db.tables.kTotalStrokes(**kwargs)
char_id
hans
hant
id
class unihan_db.tables.kXHC1983(**kwargs)
char_id
id
locations
readings
type