@akashivskyy의 대답은 훌륭하며 NSAttributedString
HTML 엔티티를 디코딩 하는 데 활용하는 방법을 보여줍니다 . 한 가지 가능한 단점은 (그가 말했듯이) 모든 HTML 마크 업도 제거 된다는 것입니다.
<strong> 4 < 5 & 3 > 2</strong>
된다
4 < 5 & 3 > 2
OS X에는 CFXMLCreateStringByUnescapingEntities()
다음과 같은 작업이 있습니다.
let encoded = "<strong> 4 < 5 & 3 > 2 .</strong> Price: 12 €. @ "
let decoded = CFXMLCreateStringByUnescapingEntities(nil, encoded, nil) as String
println(decoded)
// <strong> 4 < 5 & 3 > 2 .</strong> Price: 12 €. @
그러나 이것은 iOS에서 사용할 수 없습니다.
다음은 순수한 Swift 구현입니다. 이 같은 문자 엔터티 참조를 디코딩 <
사전을 사용하여, 모든 숫자 문자 엔터티 좋아 @
하거나 €
. (모든 252 개의 HTML 엔티티를 명시 적으로 나열하지는 않았습니다.)
스위프트 4 :
// Mapping from XML/HTML character entity reference to character
// From http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references
private let characterEntities : [ Substring : Character ] = [
// XML predefined entities:
""" : "\"",
"&" : "&",
"'" : "'",
"<" : "<",
">" : ">",
// HTML character entity references:
" " : "\u{00a0}",
// ...
"♦" : "♦",
]
extension String {
/// Returns a new string made by replacing in the `String`
/// all HTML character entity references with the corresponding
/// character.
var stringByDecodingHTMLEntities : String {
// ===== Utility functions =====
// Convert the number in the string to the corresponding
// Unicode character, e.g.
// decodeNumeric("64", 10) --> "@"
// decodeNumeric("20ac", 16) --> "€"
func decodeNumeric(_ string : Substring, base : Int) -> Character? {
guard let code = UInt32(string, radix: base),
let uniScalar = UnicodeScalar(code) else { return nil }
return Character(uniScalar)
}
// Decode the HTML character entity to the corresponding
// Unicode character, return `nil` for invalid input.
// decode("@") --> "@"
// decode("€") --> "€"
// decode("<") --> "<"
// decode("&foo;") --> nil
func decode(_ entity : Substring) -> Character? {
if entity.hasPrefix("&#x") || entity.hasPrefix("&#X") {
return decodeNumeric(entity.dropFirst(3).dropLast(), base: 16)
} else if entity.hasPrefix("&#") {
return decodeNumeric(entity.dropFirst(2).dropLast(), base: 10)
} else {
return characterEntities[entity]
}
}
// ===== Method starts here =====
var result = ""
var position = startIndex
// Find the next '&' and copy the characters preceding it to `result`:
while let ampRange = self[position...].range(of: "&") {
result.append(contentsOf: self[position ..< ampRange.lowerBound])
position = ampRange.lowerBound
// Find the next ';' and copy everything from '&' to ';' into `entity`
guard let semiRange = self[position...].range(of: ";") else {
// No matching ';'.
break
}
let entity = self[position ..< semiRange.upperBound]
position = semiRange.upperBound
if let decoded = decode(entity) {
// Replace by decoded character:
result.append(decoded)
} else {
// Invalid entity, copy verbatim:
result.append(contentsOf: entity)
}
}
// Copy remaining characters to `result`:
result.append(contentsOf: self[position...])
return result
}
}
예:
let encoded = "<strong> 4 < 5 & 3 > 2 .</strong> Price: 12 €. @ "
let decoded = encoded.stringByDecodingHTMLEntities
print(decoded)
// <strong> 4 < 5 & 3 > 2 .</strong> Price: 12 €. @
스위프트 3 :
// Mapping from XML/HTML character entity reference to character
// From http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references
private let characterEntities : [ String : Character ] = [
// XML predefined entities:
""" : "\"",
"&" : "&",
"'" : "'",
"<" : "<",
">" : ">",
// HTML character entity references:
" " : "\u{00a0}",
// ...
"♦" : "♦",
]
extension String {
/// Returns a new string made by replacing in the `String`
/// all HTML character entity references with the corresponding
/// character.
var stringByDecodingHTMLEntities : String {
// ===== Utility functions =====
// Convert the number in the string to the corresponding
// Unicode character, e.g.
// decodeNumeric("64", 10) --> "@"
// decodeNumeric("20ac", 16) --> "€"
func decodeNumeric(_ string : String, base : Int) -> Character? {
guard let code = UInt32(string, radix: base),
let uniScalar = UnicodeScalar(code) else { return nil }
return Character(uniScalar)
}
// Decode the HTML character entity to the corresponding
// Unicode character, return `nil` for invalid input.
// decode("@") --> "@"
// decode("€") --> "€"
// decode("<") --> "<"
// decode("&foo;") --> nil
func decode(_ entity : String) -> Character? {
if entity.hasPrefix("&#x") || entity.hasPrefix("&#X"){
return decodeNumeric(entity.substring(with: entity.index(entity.startIndex, offsetBy: 3) ..< entity.index(entity.endIndex, offsetBy: -1)), base: 16)
} else if entity.hasPrefix("&#") {
return decodeNumeric(entity.substring(with: entity.index(entity.startIndex, offsetBy: 2) ..< entity.index(entity.endIndex, offsetBy: -1)), base: 10)
} else {
return characterEntities[entity]
}
}
// ===== Method starts here =====
var result = ""
var position = startIndex
// Find the next '&' and copy the characters preceding it to `result`:
while let ampRange = self.range(of: "&", range: position ..< endIndex) {
result.append(self[position ..< ampRange.lowerBound])
position = ampRange.lowerBound
// Find the next ';' and copy everything from '&' to ';' into `entity`
if let semiRange = self.range(of: ";", range: position ..< endIndex) {
let entity = self[position ..< semiRange.upperBound]
position = semiRange.upperBound
if let decoded = decode(entity) {
// Replace by decoded character:
result.append(decoded)
} else {
// Invalid entity, copy verbatim:
result.append(entity)
}
} else {
// No matching ';'.
break
}
}
// Copy remaining characters to `result`:
result.append(self[position ..< endIndex])
return result
}
}
스위프트 2 :
// Mapping from XML/HTML character entity reference to character
// From http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references
private let characterEntities : [ String : Character ] = [
// XML predefined entities:
""" : "\"",
"&" : "&",
"'" : "'",
"<" : "<",
">" : ">",
// HTML character entity references:
" " : "\u{00a0}",
// ...
"♦" : "♦",
]
extension String {
/// Returns a new string made by replacing in the `String`
/// all HTML character entity references with the corresponding
/// character.
var stringByDecodingHTMLEntities : String {
// ===== Utility functions =====
// Convert the number in the string to the corresponding
// Unicode character, e.g.
// decodeNumeric("64", 10) --> "@"
// decodeNumeric("20ac", 16) --> "€"
func decodeNumeric(string : String, base : Int32) -> Character? {
let code = UInt32(strtoul(string, nil, base))
return Character(UnicodeScalar(code))
}
// Decode the HTML character entity to the corresponding
// Unicode character, return `nil` for invalid input.
// decode("@") --> "@"
// decode("€") --> "€"
// decode("<") --> "<"
// decode("&foo;") --> nil
func decode(entity : String) -> Character? {
if entity.hasPrefix("&#x") || entity.hasPrefix("&#X"){
return decodeNumeric(entity.substringFromIndex(entity.startIndex.advancedBy(3)), base: 16)
} else if entity.hasPrefix("&#") {
return decodeNumeric(entity.substringFromIndex(entity.startIndex.advancedBy(2)), base: 10)
} else {
return characterEntities[entity]
}
}
// ===== Method starts here =====
var result = ""
var position = startIndex
// Find the next '&' and copy the characters preceding it to `result`:
while let ampRange = self.rangeOfString("&", range: position ..< endIndex) {
result.appendContentsOf(self[position ..< ampRange.startIndex])
position = ampRange.startIndex
// Find the next ';' and copy everything from '&' to ';' into `entity`
if let semiRange = self.rangeOfString(";", range: position ..< endIndex) {
let entity = self[position ..< semiRange.endIndex]
position = semiRange.endIndex
if let decoded = decode(entity) {
// Replace by decoded character:
result.append(decoded)
} else {
// Invalid entity, copy verbatim:
result.appendContentsOf(entity)
}
} else {
// No matching ';'.
break
}
}
// Copy remaining characters to `result`:
result.appendContentsOf(self[position ..< endIndex])
return result
}
}