-
Notifications
You must be signed in to change notification settings - Fork 26
/
vcard.py
54 lines (47 loc) · 2.05 KB
/
vcard.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import logging
import vobject
from vobject.base import ParseError
from banal import ensure_list
from followthemoney import model
from followthemoney.util import sanitize_text
from ingestors.ingestor import Ingestor
from ingestors.support.encoding import EncodingSupport
from ingestors.exc import ProcessingException
log = logging.getLogger(__name__)
class VCardIngestor(Ingestor, EncodingSupport):
MIME_TYPES = ["text/vcard", "text/x-vcard"]
EXTENSIONS = ["vcf", "vcard"]
SCORE = 10
def get_field(self, card, field):
items = ensure_list(card.contents.get(field))
return [i.value for i in items]
def ingest_card(self, entity, card):
person = self.manager.make_entity("Person")
person.add("proof", entity.id)
person.add("name", self.get_field(card, "n"))
person.add("name", self.get_field(card, "fn"))
person.add("gender", self.get_field(card, "gender"))
person.add("birthDate", self.get_field(card, "bday"))
person.add("position", self.get_field(card, "title"))
person.add("summary", self.get_field(card, "note"))
person.add("keywords", self.get_field(card, "categories"))
person.add("phone", self.get_field(card, "tel"))
person.add("weakAlias", self.get_field(card, "nickname"))
for email in self.get_field(card, "email"):
key = email.strip().lower()
if len(key):
person.make_id(key)
person.add("email", email)
if person.id:
self.manager.apply_context(person, entity)
self.manager.emit_entity(person)
def ingest(self, file_path, entity):
entity.schema = model.get("PlainText")
text = self.read_file_decoded(entity, file_path)
text = sanitize_text(text)
entity.set("bodyText", text)
try:
for card in vobject.readComponents(text, allowQP=True):
self.ingest_card(entity, card)
except (ParseError, UnicodeDecodeError) as err:
raise ProcessingException("Cannot parse vcard: %s" % err) from err