康煕部首、CJK部首補助、サロゲートペアの検出ブックマークレット
ウェブページに康煕部首、CJK部首補助、サロゲートペアが含まれているか確認するためのブックマークレットをGeminiに作ってもらった。サロゲートペアは「CJK統合漢字拡張B以降」と表現している。
javascript:(function() {
const patterns = {
"康煕部首": /[\u2F00-\u2FDF]/g,
"CJK部首補助": /[\u2E80-\u2EFF]/g,
"CJK統合漢字拡張B以降": /[\u{20000}-\u{2FFFF}]/ug,
"絵文字": /[\u{1F000}-\u{1FFFF}\u{1F300}-\u{1F64F}\u{1F680}-\u{1F6FF}\u{1F700}-\u{1F77F}\u{1F780}-\u{1F7FF}\u{1F800}-\u{1F8FF}\u{1F900}-\u{1F9FF}\u{1FA00}-\u{1FAFF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}\u{1F1E6}-\u{1F1FF}\u{1F191}-\u{1F251}\u{1F004}\u{1F0CF}\u{1F170}-\u{1F171}\u{1F17E}-\u{1F17F}\u{1F18E}\u{3030}\u{3297}\u{3299}\u{FE00}-\u{FE0F}\u{1F018}-\u{1F02F}]/ug
};
const bodyText = document.body.innerText;
const foundChars = [];
for (const [name, pattern] of Object.entries(patterns)) {
const matches = bodyText.match(pattern);
if (matches && matches.length > 0) {
foundChars.push(`${name}: ${matches.join('')}`);
}
}
let message = "";
if (foundChars.length > 0) {
message = foundChars.join('\n');
} else {
message = "このページには康煕部首、CJK部首補助、CJK統合漢字拡張B以降の文字、絵文字は含まれていません。";
}
alert(message);
})();
以下は修正版
javascript:(function(){
function getUnicodeInfo(char, index) {
let codePoints = Array.from(char).map(c => c.codePointAt(0));
let hexCodes = codePoints.map(cp => `U+${cp.toString(16).toUpperCase()}`).join(' ');
let info = {
char: char,
type: "その他",
unicode: hexCodes,
index: index
};
if (codePoints.length === 2 && (codePoints[1] >= 0xE0100 && codePoints[1] <= 0xE01EF)) {
info.type = "異体字セレクタ (IVS) 結合";
}
else if (codePoints.length === 1 && codePoints[0] > 0xFFFF) {
info.type = "サロゲートペア";
}
else if (codePoints.length === 1 && (codePoints[0] >= 0x2F00 && codePoints[0] <= 0x2FFF)) {
info.type = "康煕部首・CJK部首補助";
}
else if (codePoints.some(cp => (cp >= 0x1F000 && cp <= 0x1FFFF) || (cp >= 0x2600 && cp <= 0x27BF))) {
info.type = "絵文字・記号";
}
else {
return null;
}
return info;
}
let targetText = window.getSelection().toString().trim();
if (!targetText) {
targetText = document.body.innerText;
}
const foundCharacters = [];
const chars = Array.from(targetText);
for (let i = 0; i < chars.length; i++) {
let char = chars[i];
let info;
if (i > 0 && char.codePointAt(0) >= 0xE0100 && char.codePointAt(0) <= 0xE01EF) {
let prevChar = chars[i-1];
info = getUnicodeInfo(prevChar + char, i);
if (foundCharacters.length > 0 && foundCharacters[foundCharacters.length - 1].index === i) {
foundCharacters.pop();
}
if (info) {
foundCharacters.push(info);
}
} else {
info = getUnicodeInfo(char, i + 1);
if (info) {
foundCharacters.push(info);
}
}
}
let resultHTML = '<div id="detected-modal" style="all:initial; position:fixed;top:10%;left:50%;transform:translateX(-50%);width:90%;max-width:600px;background:#fff;border:1px solid #ccc;box-shadow:0 4px 8px rgba(0,0,0,0.2);z-index:99999;font-family:sans-serif;font-size:14px;max-height:80vh;overflow-y:auto;cursor:move;"><div id="detected-header" style="all:revert;display:flex;justify-content:space-between;align-items:center;padding:10px;background:#f0f0f0;border-bottom:1px solid #ccc;"><span style="font-weight:bold;">特殊文字検出結果</span><span id="detected-close" style="cursor:pointer;font-size:1.5rem;line-height:1;">×</span></div><div id="detected-content" style="padding:15px;font-family:inherit;">';
if (foundCharacters.length > 0) {
resultHTML += `<p style="margin-top:0;">検出された文字数: ${foundCharacters.length}</p>`;
resultHTML += '<table style="width:100%;border-collapse:collapse;"><thead><tr><th style="border:1px solid #ccc;padding:8px;text-align:left;">種類</th><th style="border:1px solid #ccc;padding:8px;text-align:left;">文字</th><th style="border:1px solid #ccc;padding:8px;text-align:left;">Unicode</th><th style="border:1px solid #ccc;padding:8px;text-align:left;">位置</th></tr></thead><tbody>';
foundCharacters.forEach(item => {
resultHTML += `<tr><td style="border:1px solid #ccc;padding:8px;">${item.type}</td><td style="border:1px solid #ccc;padding:8px;">${item.char}</td><td style="border:1px solid #ccc;padding:8px;">${item.unicode}</td><td style="border:1px solid #ccc;padding:8px;">${item.index}</td></tr>`;
});
resultHTML += '</tbody></table>';
} else {
resultHTML += '<p>ページ内または選択したテキストに、検出対象の特殊文字は含まれていませんでした。</p>';
}
resultHTML += '</div></div>';
const existingModal = document.getElementById('detected-modal');
if (existingModal) {
existingModal.remove();
}
document.body.insertAdjacentHTML('beforeend', resultHTML);
const modal = document.getElementById('detected-modal');
const header = document.getElementById('detected-header');
const closeBtn = document.getElementById('detected-close');
let isDragging = false;
let offsetX = 0;
let offsetY = 0;
header.addEventListener('mousedown', (e) => {
if (e.target.id === 'detected-close') {
return;
}
isDragging = true;
const rect = modal.getBoundingClientRect();
modal.style.transform = 'none';
modal.style.left = `${rect.left}px`;
modal.style.top = `${rect.top}px`;
offsetX = e.clientX - rect.left;
offsetY = e.clientY - rect.top;
modal.style.cursor = 'grabbing';
});
document.addEventListener('mousemove', (e) => {
if (!isDragging) return;
modal.style.left = `${e.clientX - offsetX}px`;
modal.style.top = `${e.clientY - offsetY}px`;
});
document.addEventListener('mouseup', () => {
isDragging = false;
modal.style.cursor = 'move';
});
closeBtn.onclick = function(e) {
e.stopPropagation();
modal.remove();
};
})();
修正版はマストドンでは動作しないので、よりシンプルなコード。
javascript:(function(){
function getUnicodeInfo(char) {
const codePoints = Array.from(char).map(c => c.codePointAt(0));
const hexCodes = codePoints.map(cp => `U+${cp.toString(16).toUpperCase()}`).join(' + ');
const info = {
char: char,
type: null,
unicode: hexCodes
};
if (codePoints.length === 2 && (codePoints[1] >= 0xE0100 && codePoints[1] <= 0xE01EF)) {
info.type = "異体字セレクタ (IVS) 結合";
return info;
}
if (codePoints.length === 1) {
const cp = codePoints[0];
if (cp > 0xFFFF) {
info.type = "サロゲートペア";
return info;
}
if ((cp >= 0x2F00 && cp <= 0x2FFF)) {
info.type = "康煕部首・CJK部首補助";
return info;
}
if ((cp >= 0x1F000 && cp <= 0x1FFFF) || (cp >= 0x2600 && cp <= 0x27BF)) {
info.type = "絵文字・記号";
return info;
}
}
return null;
}
const targetText = window.getSelection().toString().trim() || document.body.innerText;
const foundCharacters = new Map();
const chars = Array.from(targetText);
for (let i = 0; i < chars.length; i++) {
let char = chars[i];
let info = null;
if (i < chars.length - 1) {
const nextChar = chars[i + 1];
if (nextChar.codePointAt(0) >= 0xE0100 && nextChar.codePointAt(0) <= 0xE01EF) {
const combinedChar = char + nextChar;
info = getUnicodeInfo(combinedChar);
if (info && info.type === "異体字セレクタ (IVS) 結合") {
const displayChar = (char.codePointAt(0) === 0x20 || char.codePointAt(0) === 0xA0) ? '(スペース)' : combinedChar;
const type = info.type;
if (!foundCharacters.has(type)) {
foundCharacters.set(type, new Set());
}
foundCharacters.get(type).add(`${displayChar} (${info.unicode})`);
i++;
continue;
}
}
}
info = getUnicodeInfo(char);
if (info) {
const displayChar = (info.char.codePointAt(0) === 0x20 || info.char.codePointAt(0) === 0xA0) ? '(スペース)' : info.char;
if (!foundCharacters.has(info.type)) {
foundCharacters.set(info.type, new Set());
}
foundCharacters.get(info.type).add(`${displayChar} (${info.unicode})`);
}
}
let message = "";
if (foundCharacters.size > 0) {
message = "以下の特殊文字が検出されました。\n\n";
for (const [type, charsSet] of foundCharacters.entries()) {
message += `【${type}】\n${[...charsSet].join('\n')}\n\n`;
}
} else {
message = "ページ内または選択したテキストに、検出対象の特殊文字は含まれていませんでした。";
}
alert(message);
})();
コメント
コメントを投稿