Wikipedia:ProyekWiki Biologi/Gen penyandi protein
Kode sumber python3 (menggunakan kerangka kerja pywikibot) untuk membuat daftar otomatis seperti pada: Daftar gen penyandi protein pada manusia/1
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import re
import csv
import io
import sys
import os
import ftplib
from datetime import datetime
import pywikibot as pybot
startTime = datetime.now()
# Setting the directory to the script/executable file location
path = os.path.dirname(sys.argv[0])
os.chdir(path)
print("Current working directory: ", os.getcwd(), "\n")
# Setting input/output variables
readFile = 'protein-coding_gene.txt'
writeFile1 = 'Human protein-coding genes wikitext 1.txt'
writeFile2 = 'Human protein-coding genes wikitext 2.txt'
writeFile3 = 'Human protein-coding genes wikitext 3.txt'
writeFile4 = 'Human protein-coding genes wikitext 4.txt'
# The dictionary below contains gene symbols listed in the Dablinks tool as dictionary keys and piped links, sans brackets, as values.
# This dictionary also contains similar key-value pairs for non-gene-related articles that are located at a gene symbol's pagename.
# Comments are included to indicate which gene symbols are dablinks vs links to an unrelated article.
# The dictionary is used in the forWP() function to write a piped wikilink instead of a plain link if the gene symbol is a dictionary key.
mistargetedLinkDictionary = {
# Approved gene symbols (links to disambiguation pages):
'A2M' : 'A2M (gene)|A2M',
'AAAS' : 'AAAS (gene)|AAAS',
'AACS' : 'AACS (gene)|AACS',
'AASS' : 'AASS (gene)|AASS',
'AATF' : 'AATF (gene)|AATF',
'ABO' : 'ABO (gene)|ABO',
'ABR' : 'ABR (gene)|ABR',
'ABRA' : 'ABRA (gene)|ABRA',
'ACAN' : 'ACAN (gene)|ACAN',
'ACCS' : 'ACCS (gene)|ACCS',
'ACD' : 'ACD (gene)|ACD',
'ACE' : 'ACE (gene)|ACE',
'ACR' : 'ACR (gene)|ACR',
'ADA' : 'ADA (gene)|ADA',
'ADGB' : 'ADGB (gene)|ADGB',
'ADK' : 'ADK (gene)|ADK',
'ADM' : 'ADM (gene)|ADM',
'ADNP' : 'ADNP (gene)|ADNP',
'ADO' : 'ADO (gene)|ADO',
'AEN' : 'AEN (gene)|AEN',
'AFM' : 'AFM (gene)|AFM',
'AFP' : 'AFP (gene)|AFP',
'AGA' : 'AGA (gene)|AGA',
'AGK' : 'AGK (gene)|AGK',
'AGL' : 'AGL (gene)|AGL',
'AGPS' : 'AGPS (gene)|AGPS',
'AGRP' : 'AGRP (gene)|AGRP',
'AGT' : 'AGT (gene)|AGT',
'AHRR' : 'AHRR (gene)|AHRR',
'AIDA' : 'AIDA (gene)|AIDA',
'AIP' : 'AIP (gene)|AIP',
'ALB' : 'ALB (gene)|ALB',
'ALK' : 'ALK (gene)|ALK',
'ALPG' : 'ALPG (gene)|ALPG',
'AMH' : 'AMH (gene)|AMH',
'AMN' : 'AMN (gene)|AMN',
'AMT' : 'AMT (gene)|AMT',
'ANG' : 'ANG (gene)|ANG',
'APC' : 'APC (gene)|APC',
'APCS' : 'APCS (gene)|APCS',
'APOE' : 'APOE (gene)|APOE',
'APP' : 'APP (gene)|APP',
'APRT' : 'APRT (gene)|APRT',
'AQR' : 'AQR (gene)|AQR',
'AR' : 'AR (gene)|AR',
'ARC' : 'ARC (gene)|ARC',
'ARNT' : 'ARNT (gene)|ARNT',
'ARSA' : 'ARSA (gene)|ARSA',
'ARSD' : 'ARSD (gene)|ARSD',
'ARX' : 'ARX (gene)|ARX',
'ASIP' : 'ASIP (gene)|ASIP',
'ASPA' : 'ASPA (gene)|ASPA',
'ASPM' : 'ASPM (gene)|ASPM',
'ATIC' : 'ATIC (gene)|ATIC',
'ATM' : 'ATM (gene)|ATM',
'ATR' : 'ATR (gene)|ATR',
'AUH' : 'AUH (gene)|AUH',
'AVP' : 'AVP (gene)|AVP',
'AXL' : 'AXL (gene)|AXL',
'B2M' : 'B2M (gene)|B2M',
'BAD' : 'BAD (gene)|BAD',
'BAX' : 'BAX (gene)|BAX',
'BBX' : 'BBX (gene)|BBX',
'BCOR' : 'BCOR (gene)|BCOR',
'BCR' : 'BCR (gene)|BCR',
'BGN' : 'BGN (gene)|BGN',
'BID' : 'BID (gene)|BID',
'BIK' : 'BIK (gene)|BIK',
'BLK' : 'BLK (gene)|BLK',
'BLM' : 'BLM (gene)|BLM',
'BMF' : 'BMF (gene)|BMF',
'BMP1' : 'BMP1 (gene)|BMP1',
'BMP2' : 'BMP2 (gene)|BMP2',
'BMP3' : 'BMP3 (gene)|BMP3',
'BOC' : 'BOC (gene)|BOC',
'BOK' : 'BOK (gene)|BOK',
'BPI' : 'BPI (gene)|BPI',
'BRAF' : 'BRAF (gene)|BRAF',
'BSG' : 'BSG (gene)|BSG',
'BSN' : 'BSN (gene)|BSN',
'BSX' : 'BSX (gene)|BSX',
'BTC' : 'BTC (gene)|BTC',
'BTK' : 'BTK (gene)|BTK',
'BTRC' : 'BTRC (gene)|BTRC',
'C2' : 'C2 (gene)|C2',
'C3' : 'C3 (gene)|C3',
'C5' : 'C5 (gene)|C5',
'C6' : 'C6 (gene)|C6',
'C7' : 'C7 (gene)|C7',
'C9' : 'C9 (gene)|C9',
'CA1' : 'CA1 (gene)|CA1',
'CA2' : 'CA2 (gene)|CA2',
'CA3' : 'CA3 (gene)|CA3',
'CA4' : 'CA4 (gene)|CA4',
'CA6' : 'CA6 (gene)|CA6',
'CA7' : 'CA7 (gene)|CA7',
'CAMP' : 'CAMP (gene)|CAMP',
'CAPS' : 'CAPS (gene)|CAPS',
'CARF' : 'CARF (gene)|CARF',
'CASR' : 'CASR (gene)|CASR',
'CAST' : 'CAST (gene)|CAST',
'CAT' : 'CAT (gene)|CAT',
'CAV1' : 'CAV1 (gene)|CAV1',
'CAV3' : 'CAV3 (gene)|CAV3',
'CBL' : 'CBL (gene)|CBL',
'CBLB' : 'CBLB (gene)|CBLB',
'CBX2' : 'CBX2 (gene)|CBX2',
'CBX5' : 'CBX5 (gene)|CBX5',
'CBX7' : 'CBX7 (gene)|CBX7',
'CCK' : 'CCK (gene)|CCK',
'CCN1' : 'CCN1 (gene)|CCN1',
'CCNI' : 'CCNI (gene)|CCNI',
'CCR3' : 'CCR3 (gene)|CCR3',
'CCS' : 'CCS (gene)|CCS',
'CD5' : 'CD5 (gene)|CD5',
'CDA' : 'CDA (gene)|CDA',
'CDH1' : 'CDH1 (gene)|CDH1',
'CDH3' : 'CDH3 (gene)|CDH3',
'CDNF' : 'CDNF (gene)|CDNF',
'CDR1' : 'CDR1 (gene)|CDR1',
'CDR2' : 'CDR2 (gene)|CDR2',
'CDV3' : 'CDV3 (gene)|CDV3',
'CDX4' : 'CDX4 (gene)|CDX4',
'CEL' : 'CEL (gene)|CEL',
'CER1' : 'CER1 (gene)|CER1',
'CFB' : 'CFB (gene)|CFB',
'CFD' : 'CFD (gene)|CFD',
'CFH' : 'CFH (gene)|CFH',
'CFI' : 'CFI (gene)|CFI',
'CFL2' : 'CFL2 (gene)|CFL2',
'CFP' : 'CFP (gene)|CFP',
'CGA' : 'CGA (gene)|CGA',
'CGN' : 'CGN (gene)|CGN',
'CHAT' : 'CHAT (gene)|CHAT',
'CHGB' : 'CHGB (gene)|CHGB',
'CHKB' : 'CHKB (gene)|CHKB',
'CHM' : 'CHM (gene)|CHM',
'CHRD' : 'CHRD (gene)|CHRD',
'CIC' : 'CIC (gene)|CIC',
'CIT' : 'CIT (gene)|CIT',
'CKB' : 'CKB (gene)|CKB',
'CKM' : 'CKM (gene)|CKM',
'CLC' : 'CLC (gene)|CLC',
'CLPP' : 'CLPP (gene)|CLPP',
'CLPS' : 'CLPS (gene)|CLPS',
'CLU' : 'CLU (gene)|CLU',
'CMAS' : 'CMAS (gene)|CMAS',
'CMIP' : 'CMIP (gene)|CMIP',
'CNP' : 'CNP (gene)|CNP',
'COCH' : 'COCH (gene)|COCH',
'COIL' : 'COIL (gene)|COIL',
'COMP' : 'COMP (gene)|COMP',
'COPA' : 'COPA (gene)|COPA',
'COPE' : 'COPE (gene)|COPE',
'COQ3' : 'COQ3 (gene)|COQ3',
'CP' : 'CP (gene)|CP',
'CPD' : 'CPD (gene)|CPD',
'CPE' : 'CPE (gene)|CPE',
'CPM' : 'CPM (gene)|CPM',
'CPO' : 'CPO (gene)|CPO',
'CPOX' : 'CPOX (gene)|CPOX',
'CPQ' : 'CPQ (gene)|CPQ',
'CPS1' : 'CPS1 (gene)|CPS1',
'CPT2' : 'CPT2 (gene)|CPT2',
'CPTP' : 'CPTP (gene)|CPTP',
'CPZ' : 'CPZ (gene)|CPZ',
'CR1' : 'CR1 (gene)|CR1',
'CR2' : 'CR2 (gene)|CR2',
'CRAT' : 'CRAT (gene)|CRAT',
'CREM' : 'CREM (gene)|CREM',
'CRH' : 'CRH (gene)|CRH',
'CRK' : 'CRK (gene)|CRK',
'CRP' : 'CRP (gene)|CRP',
'CRX' : 'CRX (gene)|CRX',
'CS' : 'CS (gene)|CS',
'CSK' : 'CSK (gene)|CSK',
'CST3' : 'CST3 (gene)|CST3',
'CST6' : 'CST6 (gene)|CST6',
'CST7' : 'CST7 (gene)|CST7',
'CST8' : 'CST8 (gene)|CST8',
'CSTA' : 'CSTA (gene)|CSTA',
'CTH' : 'CTH (gene)|CTH',
'CTNS' : 'CTNS (gene)|CTNS',
'CTRC' : 'CTRC (gene)|CTRC',
'CTRL' : 'CTRL (gene)|CTRL',
'CTSA' : 'CTSA (gene)|CTSA',
'CTSC' : 'CTSC (gene)|CTSC',
'CTSS' : 'CTSS (gene)|CTSS',
'CUTC' : 'CUTC (gene)|CUTC',
'CYBA' : 'CYBA (gene)|CYBA',
'CYCS' : 'CYCS (gene)|CYCS',
'CYGB' : 'CYGB (gene)|CYGB',
'CYLD' : 'CYLD (gene)|CYLD',
'DAO' : 'DAO (gene)|DAO',
'DAP' : 'DAP (gene)|DAP',
'DBH' : 'DBH (gene)|DBH',
'DBI' : 'DBI (gene)|DBI',
'DBNL' : 'DBNL (gene)|DBNL',
'DBP' : 'DBP (gene)|DBP',
'DBT' : 'DBT (gene)|DBT',
'DCC' : 'DCC (gene)|DCC',
'DCD' : 'DCD (gene)|DCD',
'DCK' : 'DCK (gene)|DCK',
'DCN' : 'DCN (gene)|DCN',
'DCPS' : 'DCPS (gene)|DCPS',
'DCT' : 'DCT (gene)|DCT',
'DCX' : 'DCX (gene)|DCX',
'DDC' : 'DDC (gene)|DDC',
'DDN' : 'DDN (gene)|DDN',
'DDO' : 'DDO (gene)|DDO',
'DDR2' : 'DDR2 (gene)|DDR2',
'DEK' : 'DEK (gene)|DEK',
'DENR' : 'DENR (gene)|DENR',
'DERA' : 'DERA (gene)|DERA',
'DES' : 'DES (gene)|DES',
'DFFB' : 'DFFB (gene)|DFFB',
'DHH' : 'DHH (gene)|DHH',
'DIABLO' : 'DIABLO (gene)|DIABLO',
'DLD' : 'DLD (gene)|DLD',
'DMD' : 'DMD (gene)|DMD',
'DMPK' : 'DMPK (gene)|DMPK',
'DOLK' : 'DOLK (gene)|DOLK',
'DPT' : 'DPT (gene)|DPT',
'DSE' : 'DSE (gene)|DSE',
'DSP' : 'DSP (gene)|DSP',
'DUT' : 'DUT (gene)|DUT',
'EBP' : 'EBP (gene)|EBP',
'ECD' : 'ECD (gene)|ECD',
'EDA' : 'EDA (gene)|EDA',
'EED' : 'EED (gene)|EED',
'EFS' : 'EFS (gene)|EFS',
'EGF' : 'EGF (gene)|EGF',
'EGFR' : 'EGFR (gene)|EGFR',
'EHF' : 'EHF (gene)|EHF',
'ELL' : 'ELL (gene)|ELL',
'ELN' : 'ELN (gene)|ELN',
'EMB' : 'EMB (gene)|EMB',
'EMC2' : 'EMC2 (gene)|EMC2',
'EMD' : 'EMD (gene)|EMD',
'ENAH' : 'ENAH (gene)|ENAH',
'ENG' : 'ENG (gene)|ENG',
'EPO' : 'EPO (gene)|EPO',
'EPOR' : 'EPOR (gene)|EPOR',
'EPX' : 'EPX (gene)|EPX',
'ERAS' : 'ERAS (gene)|ERAS',
'ERC2' : 'ERC2 (gene)|ERC2',
'ERF' : 'ERF (gene)|ERF',
'ERG' : 'ERG (gene)|ERG',
'ERH' : 'ERH (gene)|ERH',
'ESD' : 'ESD (gene)|ESD',
'EVC' : 'EVC (gene)|EVC',
'EVL' : 'EVL (gene)|EVL',
'EVPL' : 'EVPL (gene)|EVPL',
'EXT2' : 'EXT2 (gene)|EXT2',
'F10' : 'F10 (gene)|F10',
'F11' : 'F11 (gene)|F11',
'F12' : 'F12 (gene)|F12',
'F2' : 'F2 (gene)|F2',
'F3' : 'F3 (gene)|F3',
'F5' : 'F5 (gene)|F5',
'F7' : 'F7 (gene)|F7',
'F8' : 'F8 (gene)|F8',
'F9' : 'F9 (gene)|F9',
'FAH' : 'FAH (gene)|FAH',
'FAP' : 'FAP (gene)|FAP',
'FAS' : 'FAS (gene)|FAS',
'FASN' : 'FASN (gene)|FASN',
'FAU' : 'FAU (gene)|FAU',
'FBL' : 'FBL (gene)|FBL',
'FECH' : 'FECH (gene)|FECH',
'FER' : 'FER (gene)|FER',
'FES' : 'FES (gene)|FES',
'FEV' : 'FEV (gene)|FEV',
'FGA' : 'FGA (gene)|FGA',
'FGB' : 'FGB (gene)|FGB',
'FGG' : 'FGG (gene)|FGG',
'FGR' : 'FGR (gene)|FGR',
'FH' : 'FH (gene)|FH',
'FLG' : 'FLG (gene)|FLG',
'FN3K' : 'FN3K (gene)|FN3K',
'FOS' : 'FOS (gene)|FOS',
'FPGS' : 'FPGS (gene)|FPGS',
'FRK' : 'FRK (gene)|FRK',
'FST' : 'FST (gene)|FST',
'FTL' : 'FTL (gene)|FTL',
'FTO' : 'FTO (gene)|FTO',
'FUS' : 'FUS (gene)|FUS',
'GAA' : 'GAA (gene)|GAA',
'GAK' : 'GAK (gene)|GAK',
'GAL' : 'GAL (gene)|GAL',
'GALK2' : 'GALK2 (gene)|GALK2',
'GALT' : 'GALT (gene)|GALT',
'GAN' : 'GAN (gene)|GAN',
'GART' : 'GART (gene)|GART',
'GATC' : 'GATC (gene)|GATC',
'GC' : 'GC (gene)|GC',
'GCA' : 'GCA (gene)|GCA',
'GCG' : 'GCG (gene)|GCG',
'GCK' : 'GCK (gene)|GCK',
'GCNA' : 'GCNA (gene)|GCNA',
'GDA' : 'GDA (gene)|GDA',
'GEM' : 'GEM (gene)|GEM',
'GFAP' : 'GFAP (gene)|GFAP',
'GGH' : 'GGH (gene)|GGH',
'GGN' : 'GGN (gene)|GGN',
'GH1' : 'GH1 (gene)|GH1',
'GHR' : 'GHR (gene)|GHR',
'GIP' : 'GIP (gene)|GIP',
'GK' : 'GK (gene)|GK',
'GK2' : 'GK2 (gene)|GK2',
'GLA' : 'GLA (gene)|GLA',
'GLS' : 'GLS (gene)|GLS',
'GML' : 'GML (gene)|GML',
'GNAS' : 'GNAS (gene)|GNAS',
'GNE' : 'GNE (gene)|GNE',
'GNS' : 'GNS (gene)|GNS',
'GP2' : 'GP2 (gene)|GP2',
'GP5' : 'GP5 (gene)|GP5',
'GP9' : 'GP9 (gene)|GP9',
'GPI' : 'GPI (gene)|GPI',
'GPT' : 'GPT (gene)|GPT',
'GPT2' : 'GPT2 (gene)|GPT2',
'GRASP' : 'GRASP (gene)|GRASP',
'GRIP1' : 'GRIP1 (gene)|GRIP1',
'GRN' : 'GRN (gene)|GRN',
'GRP' : 'GRP (gene)|GRP',
'GSC' : 'GSC (gene)|GSC',
'GSN' : 'GSN (gene)|GSN',
'GSR' : 'GSR (gene)|GSR',
'GSS' : 'GSS (gene)|GSS',
'HAL' : 'HAL (gene)|HAL',
'HBM' : 'HBM (gene)|HBM',
'HBZ' : 'HBZ (gene)|HBZ',
'HCCS' : 'HCCS (gene)|HCCS',
'HCST' : 'HCST (gene)|HCST',
'HDC' : 'HDC (gene)|HDC',
'HDGF' : 'HDGF (gene)|HDGF',
'HDX' : 'HDX (gene)|HDX',
'HFE' : 'HFE (gene)|HFE',
'HGD' : 'HGD (gene)|HGD',
'HGF' : 'HGF (gene)|HGF',
'HGS' : 'HGS (gene)|HGS',
'HJV' : 'HJV (gene)|HJV',
'HLF' : 'HLF (gene)|HLF',
'HLX' : 'HLX (gene)|HLX',
'HMBS' : 'HMBS (gene)|HMBS',
'HP' : 'HP (gene)|HP',
'HPD' : 'HPD (gene)|HPD',
'HPN' : 'HPN (gene)|HPN',
'HPR' : 'HPR (gene)|HPR',
'HPSE' : 'HPSE (gene)|HPSE',
'HR' : 'HR (gene)|HR',
'HRC' : 'HRC (gene)|HRC',
'HRG' : 'HRG (gene)|HRG',
'HRK' : 'HRK (gene)|HRK',
'HTT' : 'HTT (gene)|HTT',
'HUNK' : 'HUNK (gene)|HUNK',
'HYI' : 'HYI (gene)|HYI',
'IAPP' : 'IAPP (gene)|IAPP',
'IDE' : 'IDE (gene)|IDE',
'IDS' : 'IDS (gene)|IDS',
'IGH' : 'IGH (gene)|IGH',
'IGK' : 'IGK (gene)|IGK',
'IGL' : 'IGL (gene)|IGL',
'IHH' : 'IHH (gene)|IHH',
'IK' : 'IK (gene)|IK',
'IL2' : 'IL2 (gene)|IL2',
'IL3' : 'IL3 (gene)|IL3',
'IL4' : 'IL4 (gene)|IL4',
'IL5' : 'IL5 (gene)|IL5',
'IL6' : 'IL6 (gene)|IL6',
'IL7' : 'IL7 (gene)|IL7',
'IL9' : 'IL9 (gene)|IL9',
'IL10' : 'IL10 (gene)|IL10',
'IL11' : 'IL11 (gene)|IL11',
'IL13' : 'IL13 (gene)|IL13',
'IL15' : 'IL15 (gene)|IL15',
'IL16' : 'IL16 (gene)|IL16',
'IL18' : 'IL18 (gene)|IL18',
'IL19' : 'IL19 (gene)|IL19',
'IL20' : 'IL20 (gene)|IL20',
'IL21' : 'IL21 (gene)|IL21',
'IL22' : 'IL22 (gene)|IL22',
'IL24' : 'IL24 (gene)|IL24',
'IL25' : 'IL25 (gene)|IL25',
'IL26' : 'IL26 (gene)|IL26',
'IL32' : 'IL32 (gene)|IL32',
'IMPACT' : 'IMPACT (gene)|IMPACT',
'INA' : 'INA (gene)|INA',
'INS' : 'INS (gene)|INS',
'INSC' : 'INSC (gene)|INSC',
'INTU' : 'INTU (gene)|INTU',
'IPP' : 'IPP (gene)|IPP',
'ITK' : 'ITK (gene)|ITK',
'IVD' : 'IVD (gene)|IVD',
'IVL' : 'IVL (gene)|IVL',
'JMY' : 'JMY (gene)|JMY',
'JRK' : 'JRK (gene)|JRK',
'JTB' : 'JTB (gene)|JTB',
'JUN' : 'JUN (gene)|JUN',
'JUP' : 'JUP (gene)|JUP',
'KCP' : 'KCP (gene)|KCP',
'KDR' : 'KDR (gene)|KDR',
'KERA' : 'KERA (gene)|KERA',
'KHK' : 'KHK (gene)|KHK',
'KIN' : 'KIN (gene)|KIN',
'KIT' : 'KIT (gene)|KIT',
'KL' : 'KL (gene)|KL',
'KPRP' : 'KPRP (gene)|KPRP',
'KPTN' : 'KPTN (gene)|KPTN',
'KY' : 'KY (gene)|KY',
'LAT' : 'LAT (gene)|LAT',
'LBH' : 'LBH (gene)|LBH',
'LBP' : 'LBP (gene)|LBP',
'LBR' : 'LBR (gene)|LBR',
'LCAT' : 'LCAT (gene)|LCAT',
'LCK' : 'LCK (gene)|LCK',
'LCT' : 'LCT (gene)|LCT',
'LHB' : 'LHB (gene)|LHB',
'LIAS' : 'LIAS (gene)|LIAS',
'LIF' : 'LIF (gene)|LIF',
'LIPA' : 'LIPA (gene)|LIPA',
'LIPC' : 'LIPC (gene)|LIPC',
'LIPE' : 'LIPE (gene)|LIPE',
'LPA' : 'LPA (gene)|LPA',
'LPL' : 'LPL (gene)|LPL',
'LPO' : 'LPO (gene)|LPO',
'LPP' : 'LPP (gene)|LPP',
'LRAT' : 'LRAT (gene)|LRAT',
'LRP4' : 'LRP4 (gene)|LRP4',
'LSR' : 'LSR (gene)|LSR',
'LSS' : 'LSS (gene)|LSS',
'LTA' : 'LTA (gene)|LTA',
'LTF' : 'LTF (gene)|LTF',
'LTK' : 'LTK (gene)|LTK',
'LUM' : 'LUM (gene)|LUM',
'MADD' : 'MADD (gene)|MADD',
'MAF' : 'MAF (gene)|MAF',
'MAFB' : 'MAFB (gene)|MAFB',
'MAFF' : 'MAFF (gene)|MAFF',
'MAG' : 'MAG (gene)|MAG',
'MAK' : 'MAK (gene)|MAK',
'MAL' : 'MAL (gene)|MAL',
'MANBA' : 'MANBA (gene)|MANBA',
'MASP2' : 'MASP2 (gene)|MASP2',
'MATK' : 'MATK (gene)|MATK',
'MAX' : 'MAX (gene)|MAX',
'MAZ' : 'MAZ (gene)|MAZ',
'MB' : 'MB (gene)|MB',
'MBP' : 'MBP (gene)|MBP',
'MCC' : 'MCC (gene)|MCC',
'MCU' : 'MCU (gene)|MCU',
'MDK' : 'MDK (gene)|MDK',
'ME1' : 'ME1 (gene)|ME1',
'ME2' : 'ME2 (gene)|ME2',
'ME3' : 'ME3 (gene)|ME3',
'MESD' : 'MESD (gene)|MESD',
'MEST' : 'MEST (gene)|MEST',
'MET' : 'MET (gene)|MET',
'MFF' : 'MFF (gene)|MFF',
'MGA' : 'MGA (gene)|MGA',
'MGP' : 'MGP (gene)|MGP',
'MIA' : 'MIA (gene)|MIA',
'MIB1' : 'MIB1 (gene)|MIB1',
'MICA' : 'MICA (gene)|MICA',
'MIF' : 'MIF (gene)|MIF',
'MIP' : 'MIP (gene)|MIP',
'MLN' : 'MLN (gene)|MLN',
'MLX' : 'MLX (gene)|MLX',
'MMD' : 'MMD (gene)|MMD',
'MME' : 'MME (gene)|MME',
'MN1' : 'MN1 (gene)|MN1',
'MNT' : 'MNT (gene)|MNT',
'MOG' : 'MOG (gene)|MOG',
'MOS' : 'MOS (gene)|MOS',
'MPG' : 'MPG (gene)|MPG',
'MPI' : 'MPI (gene)|MPI',
'MPL' : 'MPL (gene)|MPL',
'MPO' : 'MPO (gene)|MPO',
'MR1' : 'MR1 (gene)|MR1',
'MRM2' : 'MRM2 (gene)|MRM2',
'MRO' : 'MRO (gene)|MRO',
'MSC' : 'MSC (gene)|MSC',
'MSRA' : 'MSRA (gene)|MSRA',
'MT3' : 'MT3 (gene)|MT3',
'MTRR' : 'MTRR (gene)|MTRR',
'MYB' : 'MYB (gene)|MYB',
'NADK' : 'NADK (gene)|NADK',
'NAIP' : 'NAIP (gene)|NAIP',
'NANOG' : 'NANOG (gene)|NANOG',
'NAPA' : 'NAPA (gene)|NAPA',
'NARF' : 'NARF (gene)|NARF',
'NASP' : 'NASP (gene)|NASP',
'NBAS' : 'NBAS (gene)|NBAS',
'NBN' : 'NBN (gene)|NBN',
'NCL' : 'NCL (gene)|NCL',
'NCS1' : 'NCS1 (gene)|NCS1',
'NDN' : 'NDN (gene)|NDN',
'NDP' : 'NDP (gene)|NDP',
'NEB' : 'NEB (gene)|NEB',
'NF1' : 'NF1 (gene)|NF1',
'NF2' : 'NF2 (gene)|NF2',
'NFIB' : 'NFIB (gene)|NFIB',
'NGB' : 'NGB (gene)|NGB',
'NGF' : 'NGF (gene)|NGF',
'NIN' : 'NIN (gene)|NIN',
'NLN' : 'NLN (gene)|NLN',
'NMB' : 'NMB (gene)|NMB',
'NMI' : 'NMI (gene)|NMI',
'NMS' : 'NMS (gene)|NMS',
'NMT1' : 'NMT1 (gene)|NMT1',
'NMU' : 'NMU (gene)|NMU',
'NNT' : 'NNT (gene)|NNT',
'NOG' : 'NOG (gene)|NOG',
'NPAT' : 'NPAT (gene)|NPAT',
'NPL' : 'NPL (gene)|NPL',
'NPPA' : 'NPPA (gene)|NPPA',
'NPPC' : 'NPPC (gene)|NPPC',
'NPS' : 'NPS (gene)|NPS',
'NQO2' : 'NQO2 (gene)|NQO2',
'NRAS' : 'NRAS (gene)|NRAS',
'NRDC' : 'NRDC (gene)|NRDC',
'NRM' : 'NRM (gene)|NRM',
'NSF' : 'NSF (gene)|NSF',
'NTM' : 'NTM (gene)|NTM',
'NTS' : 'NTS (gene)|NTS',
'NUMB' : 'NUMB (gene)|NUMB',
'NYX' : 'NYX (gene)|NYX',
'OAF' : 'OAF (gene)|OAF',
'OAT' : 'OAT (gene)|OAT',
'OCM' : 'OCM (gene)|OCM',
'OGA' : 'OGA (gene)|OGA',
'OGN' : 'OGN (gene)|OGN',
'OGT' : 'OGT (gene)|OGT',
'OMD' : 'OMD (gene)|OMD',
'OMG' : 'OMG (gene)|OMG',
'OMP' : 'OMP (gene)|OMP',
'OPTN' : 'OPTN (gene)|OPTN',
'OS9' : 'OS9 (gene)|OS9',
'OSM' : 'OSM (gene)|OSM',
'OSTC' : 'OSTC (gene)|OSTC',
'OTC' : 'OTC (gene)|OTC',
'OTP' : 'OTP (gene)|OTP',
'OXT' : 'OXT (gene)|OXT',
'PAH' : 'PAH (gene)|PAH',
'PAM' : 'PAM (gene)|PAM',
'PBK' : 'PBK (gene)|PBK',
'PC' : 'PC (gene)|PC',
'PCCA' : 'PCCA (gene)|PCCA',
'PDC' : 'PDC (gene)|PDC',
'PEMT' : 'PEMT (gene)|PEMT',
'PFAS' : 'PFAS (gene)|PFAS',
'PGC' : 'PGC (gene)|PGC',
'PGD' : 'PGD (gene)|PGD',
'PGF' : 'PGF (gene)|PGF',
'PGP' : 'PGP (gene)|PGP',
'PGR' : 'PGR (gene)|PGR',
'PHB' : 'PHB (gene)|PHB',
'PI3' : 'PI3 (gene)|PI3',
'PIGS' : 'PIGS (gene)|PIGS',
'PIP' : 'PIP (gene)|PIP',
'PIR' : 'PIR (gene)|PIR',
'PISD' : 'PISD (gene)|PISD',
'PKM' : 'PKM (gene)|PKM',
'PLAT' : 'PLAT (gene)|PLAT',
'PLG' : 'PLG (gene)|PLG',
'PLN' : 'PLN (gene)|PLN',
'PMEL' : 'PMEL (gene)|PMEL',
'PML' : 'PML (gene)|PML',
'PNN' : 'PNN (gene)|PNN',
'PNP' : 'PNP (gene)|PNP',
'POLB' : 'POLB (gene)|POLB',
'POLE' : 'POLE (gene)|POLE',
'POR' : 'POR (gene)|POR',
'PPID' : 'PPID (gene)|PPID',
'PRCC' : 'PRCC (gene)|PRCC',
'PRL' : 'PRL (gene)|PRL',
'PRLR' : 'PRLR (gene)|PRLR',
'PROC' : 'PROC (gene)|PROC',
'PROZ' : 'PROZ (gene)|PROZ',
'PRX' : 'PRX (gene)|PRX',
'PRY' : 'PRY (gene)|PRY',
'PSAP' : 'PSAP (gene)|PSAP',
'PSCA' : 'PSCA (gene)|PSCA',
'PSD' : 'PSD (gene)|PSD',
'PTEN' : 'PTEN (gene)|PTEN',
'PTH' : 'PTH (gene)|PTH',
'PTMS' : 'PTMS (gene)|PTMS',
'PTN' : 'PTN (gene)|PTN',
'PTS' : 'PTS (gene)|PTS',
'PVR' : 'PVR (gene)|PVR',
'PZP' : 'PZP (gene)|PZP',
'RAD1' : 'RAD1 (gene)|RAD1',
'RAD50' : 'RAD50 (gene)|RAD50',
'RAMAC' : 'RAMAC (gene)|RAMAC',
'RAN' : 'RAN (gene)|RAN',
'RARA' : 'RARA (gene)|RARA',
'RARB' : 'RARB (gene)|RARB',
'REN' : 'REN (gene)|REN',
'RET' : 'RET (gene)|RET',
'RGN' : 'RGN (gene)|RGN',
'RGR' : 'RGR (gene)|RGR',
'RHCE' : 'RHCE (gene)|RHCE',
'RHD' : 'RHD (gene)|RHD',
'RILP' : 'RILP (gene)|RILP',
'RLF' : 'RLF (gene)|RLF',
'RORC' : 'RORC (gene)|RORC',
'RP2' : 'RP2 (gene)|RP2',
'RPE' : 'RPE (gene)|RPE',
'RTCA' : 'RTCA (gene)|RTCA',
'RYK' : 'RYK (gene)|RYK',
'SACS' : 'SACS (gene)|SACS',
'SAG' : 'SAG (gene)|SAG',
'SARAF' : 'SARAF (gene)|SARAF',
'SAT1' : 'SAT1 (gene)|SAT1',
'SCAI' : 'SCAI (gene)|SCAI',
'SCAP' : 'SCAP (gene)|SCAP',
'SCD' : 'SCD (gene)|SCD',
'SCT' : 'SCT (gene)|SCT',
'SCX' : 'SCX (gene)|SCX',
'SDF2' : 'SDF2 (gene)|SDF2',
'SDS' : 'SDS (gene)|SDS',
'SDSL' : 'SDSL (gene)|SDSL',
'SET' : 'SET (gene)|SET',
'SF1' : 'SF1 (gene)|SF1',
'SFN' : 'SFN (gene)|SFN',
'SGCD' : 'SGCD (gene)|SGCD',
'SHB' : 'SHB (gene)|SHB',
'SHE' : 'SHE (gene)|SHE',
'SHF' : 'SHF (gene)|SHF',
'SHH' : 'SHH (gene)|SHH',
'SKI' : 'SKI (gene)|SKI',
'SLA' : 'SLA (gene)|SLA',
'SLK' : 'SLK (gene)|SLK',
'SLN' : 'SLN (gene)|SLN',
'SMO' : 'SMO (gene)|SMO',
'SNCA' : 'SNCA (gene)|SNCA',
'SNN' : 'SNN (gene)|SNN',
'SON' : 'SON (gene)|SON',
'SOS2' : 'SOS2 (gene)|SOS2',
'SP1' : 'SP1 (gene)|SP1',
'SP2' : 'SP2 (gene)|SP2',
'SP3' : 'SP3 (gene)|SP3',
'SP4' : 'SP4 (gene)|SP4',
'SP5' : 'SP5 (gene)|SP5',
'SP6' : 'SP6 (gene)|SP6',
'SP7' : 'SP7 (gene)|SP7',
'SP8' : 'SP8 (gene)|SP8',
'SP9' : 'SP9 (gene)|SP9',
'SPIC' : 'SPIC (gene)|SPIC',
'SPN' : 'SPN (gene)|SPN',
'SPP1' : 'SPP1 (gene)|SPP1',
'SPR' : 'SPR (gene)|SPR',
'SPX' : 'SPX (gene)|SPX',
'SRC' : 'SRC (gene)|SRC',
'SRF' : 'SRF (gene)|SRF',
'SRI' : 'SRI (gene)|SRI',
'SRL' : 'SRL (gene)|SRL',
'SRM' : 'SRM (gene)|SRM',
'SRR' : 'SRR (gene)|SRR',
'SRRT' : 'SRRT (gene)|SRRT',
'SSB' : 'SSB (gene)|SSB',
'SST' : 'SST (gene)|SST',
'SSX4' : 'SSX4 (gene)|SSX4',
'STAC' : 'STAC (gene)|STAC',
'STAM' : 'STAM (gene)|STAM',
'STAR' : 'STAR (gene)|STAR',
'STH' : 'STH (gene)|STH',
'STS' : 'STS (gene)|STS',
'SYNC' : 'SYNC (gene)|SYNC',
'TANK' : 'TANK (gene)|TANK',
'TAT' : 'TAT (gene)|TAT',
'TAZ' : 'TAZ (gene)|TAZ',
'TBCC' : 'TBCC (gene)|TBCC',
'TBK1' : 'TBK1 (gene)|TBK1',
'TBP' : 'TBP (gene)|TBP',
'TCAP' : 'TCAP (gene)|TCAP',
'TDG' : 'TDG (gene)|TDG',
'TEC' : 'TEC (gene)|TEC',
'TEF' : 'TEF (gene)|TEF',
'TEK' : 'TEK (gene)|TEK',
'TES' : 'TES (gene)|TES',
'TESC' : 'TESC (gene)|TESC',
'TF' : 'TF (gene)|TF',
'TFG' : 'TFG (gene)|TFG',
'TG' : 'TG (gene)|TG',
'TGFA' : 'TGFA (gene)|TGFA',
'TH' : 'TH (gene)|TH',
'THPO' : 'THPO (gene)|THPO',
'TIMELESS' : 'TIMELESS (gene)|TIMELESS',
'TKT' : 'TKT (gene)|TKT',
'TNC' : 'TNC (gene)|TNC',
'TNN' : 'TNN (gene)|TNN',
'TNR' : 'TNR (gene)|TNR',
'TPO' : 'TPO (gene)|TPO',
'TPR' : 'TPR (gene)|TPR',
'TPT1' : 'TPT1 (gene)|TPT1',
'TRA' : 'TRA (gene)|TRA',
'TRB' : 'TRB (gene)|TRB',
'TRD' : 'TRD (gene)|TRD',
'TRG' : 'TRG (gene)|TRG',
'TRIL' : 'TRIL (gene)|TRIL',
'TRIO' : 'TRIO (gene)|TRIO',
'TRO' : 'TRO (gene)|TRO',
'TSN' : 'TSN (gene)|TSN',
'TSPO' : 'TSPO (gene)|TSPO',
'TSR2' : 'TSR2 (gene)|TSR2',
'TST' : 'TST (gene)|TST',
'TTK' : 'TTK (gene)|TTK',
'TTL' : 'TTL (gene)|TTL',
'TTN' : 'TTN (gene)|TTN',
'TTR' : 'TTR (gene)|TTR',
'TUB' : 'TUB (gene)|TUB',
'TXN' : 'TXN (gene)|TXN',
'TYR' : 'TYR (gene)|TYR',
'UBB' : 'UBB (gene)|UBB',
'UBD' : 'UBD (gene)|UBD',
'UCN' : 'UCN (gene)|UCN',
'UMPS' : 'UMPS (gene)|UMPS',
'UNG' : 'UNG (gene)|UNG',
'UNK' : 'UNK (gene)|UNK',
'UST' : 'UST (gene)|UST',
'UTY' : 'UTY (gene)|UTY',
'VCL' : 'VCL (gene)|VCL',
'VCP' : 'VCP (gene)|VCP',
'VCY' : 'VCY (gene)|VCY',
'VDR' : 'VDR (gene)|VDR',
'VIM' : 'VIM (gene)|VIM',
'VIT' : 'VIT (gene)|VIT',
'VWF' : 'VWF (gene)|VWF',
'WAC' : 'WAC (gene)|WAC',
'WAS' : 'WAS (gene)|WAS',
'WASL' : 'WASL (gene)|WASL',
'WIZ' : 'WIZ (gene)|WIZ',
'WLS' : 'WLS (gene)|WLS',
'WRN' : 'WRN (gene)|WRN',
'XDH' : 'XDH (gene)|XDH',
'XG' : 'XG (gene)|XG',
'XK' : 'XK (gene)|XK',
'XPC' : 'XPC (gene)|XPC',
'ZAN' : 'ZAN (gene)|ZAN',
'ZYX' : 'ZYX (gene)|ZYX',
# Approved gene symbols (links to non-gene pages):
'AAMP' : 'AAMP (gene)|AAMP',
'AARD' : 'AARD (gene)|AARD',
'ADSL' : 'ADSL (gene)|ADSL',
'AK1' : 'AK1 (gene)|AK1',
'AK4' : 'AK4 (gene)|AK4',
'AK5' : 'AK5 (gene)|AK5',
'AK9' : 'AK9 (gene)|AK9',
'ALLC' : 'ALLC (gene)|ALLC',
'APEH' : 'APEH (gene)|APEH',
'APOD' : 'APOD (gene)|APOD',
'ARSF' : 'ARSF (gene)|ARSF',
'ASL' : 'ASL (gene)|ASL',
'ATRIP' : 'ATRIP (gene)|ATRIP',
'AVEN' : 'AVEN (gene)|AVEN',
'AVIL' : 'AVIL (gene)|AVIL',
'BATF' : 'BATF (gene)|BATF',
'BBC3' : 'BBC3 (gene)|BBC3',
'BIVM' : 'BIVM (gene)|BIVM',
'BMX' : 'BMX (gene)|BMX',
'BRF1' : 'BRF1 (gene)|BRF1',
'BRF2' : 'BRF2 (gene)|BRF2',
'CAD' : 'CAD (gene)|CAD',
'CBS' : 'CBS (gene)|CBS',
'CBSL' : 'CBSL (gene)|CBSL',
'CCN2' : 'CCN2 (gene)|CCN2',
'CCN3' : 'CCN3 (gene)|CCN3',
'CCN4' : 'CCN4 (gene)|CCN4',
'CCNC' : 'CCNC (gene)|CCNC',
'CCNY' : 'CCNY (gene)|CCNY',
'CCSAP' : 'CCSAP (gene)|CCSAP',
'CCT2' : 'CCT2 (gene)|CCT2',
'CCT5' : 'CCT5 (gene)|CCT5',
'CEPT1' : 'CEPT1 (gene)|CEPT1',
'CES3' : 'CES3 (gene)|CES3',
'CGAS' : 'CGAS (gene)|CGAS',
'CGB2' : 'CGB2 (gene)|CGB2',
'CGB3' : 'CGB3 (gene)|CGB3',
'CHGA' : 'CHGA (gene)|CHGA',
'CHIA' : 'CHIA (gene)|CHIA',
'CHML' : 'CHML (gene)|CHML',
'CHP2' : 'CHP2 (gene)|CHP2',
'CINP' : 'CINP (gene)|CINP',
'CIPC' : 'CIPC (gene)|CIPC',
'CKLF' : 'CKLF (gene)|CKLF',
'CLK3' : 'CLK3 (gene)|CLK3',
'CLK4' : 'CLK4 (gene)|CLK4',
'CLMP' : 'CLMP (gene)|CLMP',
'CLTA' : 'CLTA (gene)|CLTA',
'CMC2' : 'CMC2 (gene)|CMC2',
'CNN3' : 'CNN3 (gene)|CNN3',
'CPA4' : 'CPA4 (gene)|CPA4',
'CRB2' : 'CRB2 (gene)|CRB2',
'CRCP' : 'CRCP (gene)|CRCP',
'CROT' : 'CROT (gene)|CROT',
'CSF3' : 'CSF3 (gene)|CSF3',
'CSH2' : 'CSH2 (gene)|CSH2',
'CSN3' : 'CSN3 (gene)|CSN3',
'CST9' : 'CST9 (gene)|CST9',
'CTSH' : 'CTSH (gene)|CTSH',
'CTSW' : 'CTSW (gene)|CTSW',
'CTU2' : 'CTU2 (gene)|CTU2',
'CUTA' : 'CUTA (gene)|CUTA',
'CYREN' : 'CYREN (gene)|CYREN',
'DDT' : 'DDT (gene)|DDT',
'DMTN' : 'DMTN (gene)|DMTN',
'DMWD' : 'DMWD (gene)|DMWD',
'DNA2' : 'DNA2 (gene)|DNA2',
'DSEL' : 'DSEL (gene)|DSEL',
'DR1' : 'DR1 (gene)|DR1',
'DST' : 'DST (gene)|DST',
'DSTN' : 'DSTN (gene)|DSTN',
'DTL' : 'DTL (gene)|DTL',
'DXO' : 'DXO (gene)|DXO',
'EBF3' : 'EBF3 (gene)|EBF3',
'ELL2' : 'ELL2 (gene)|ELL2',
'EME2' : 'EME2 (gene)|EME2',
'EN1' : 'EN1 (gene)|EN1',
'EN2' : 'EN2 (gene)|EN2',
'ENSA' : 'ENSA (gene)|ENSA',
'EPOP' : 'EPOP (gene)|EPOP',
'EPYC' : 'EPYC (gene)|EPYC',
'ESAM' : 'ESAM (gene)|ESAM',
'ESPN' : 'ESPN (gene)|ESPN',
'ETDA' : 'ETDA (gene)|ETDA',
'ETV2' : 'ETV2 (gene)|ETV2',
'ETV3' : 'ETV3 (gene)|ETV3',
'FARSA' : 'FARSA (gene)|FARSA',
'FLNC' : 'FLNC (gene)|FLNC',
'FMOD' : 'FMOD (gene)|FMOD',
'FRY' : 'FRY (gene)|FRY',
'GALP' : 'GALP (gene)|GALP',
'GATB' : 'GATB (gene)|GATB',
'GATM' : 'GATM (gene)|GATM',
'GBA' : 'GBA (gene)|GBA',
'GFY' : 'GFY (gene)|GFY',
'GGCT' : 'GGCT (gene)|GGCT',
'GMDS' : 'GMDS (gene)|GMDS',
'GMIP' : 'GMIP (gene)|GMIP',
'GPS2' : 'GPS2 (gene)|GPS2',
'GPX2' : 'GPX2 (gene)|GPX2',
'HECA' : 'HECA (gene)|HECA',
'HPCA' : 'HPCA (gene)|HPCA',
'HPX' : 'HPX (gene)|HPX',
'ICOS' : 'ICOS (gene)|ICOS',
'ID3' : 'ID3 (gene)|ID3',
'IRGC' : 'IRGC (gene)|IRGC',
'ISX' : 'ISX (gene)|ISX',
'KAT7' : 'KAT7 (gene)|KAT7',
'KAZN' : 'KAZN (gene)|KAZN',
'KDSR' : 'KDSR (gene)|KDSR',
'KEL' : 'KEL (gene)|KEL',
'KIZ' : 'KIZ (gene)|KIZ',
'KLB' : 'KLB (gene)|KLB',
'KLLN' : 'KLLN (gene)|KLLN',
'KMO' : 'KMO (gene)|KMO',
'KNCN' : 'KNCN (gene)|KNCN',
'KYNU' : 'KYNU (gene)|KYNU',
'LEP' : 'LEP (gene)|LEP',
'LIPI' : 'LIPI (gene)|LIPI',
'LIPK' : 'LIPK (gene)|LIPK',
'LIPN' : 'LIPN (gene)|LIPN',
'LOX' : 'LOX (gene)|LOX',
'LTV1' : 'LTV1 (gene)|LTV1',
'LVRN' : 'LVRN (gene)|LVRN',
'LXN' : 'LXN (gene)|LXN',
'MAFA' : 'MAFA (gene)|MAFA',
'MAGIX' : 'MAGIX (gene)|MAGIX',
'MAL2' : 'MAL2 (gene)|MAL2',
'MAVS' : 'MAVS (gene)|MAVS',
'MBIP' : 'MBIP (gene)|MBIP',
'MCAT' : 'MCAT (gene)|MCAT',
'MGMT' : 'MGMT (gene)|MGMT',
'MIB2' : 'MIB2 (gene)|MIB2',
'MIDN' : 'MIDN (gene)|MIDN',
'MIOS' : 'MIOS (gene)|MIOS',
'MLEC' : 'MLEC (gene)|MLEC',
'MLIP' : 'MLIP (gene)|MLIP',
'MOK' : 'MOK (gene)|MOK',
'MPEG1' : 'MPEG1 (gene)|MPEG1',
'MRAP' : 'MRAP (gene)|MRAP',
'MRM1' : 'MRM1 (gene)|MRM1',
'MSLN' : 'MSLN (gene)|MSLN',
'MSN' : 'MSN (gene)|MSN',
'MSX2' : 'MSX2 (gene)|MSX2',
'MT4' : 'MT4 (gene)|MT4',
'MTR' : 'MTR (gene)|MTR',
'MVD' : 'MVD (gene)|MVD',
'MVK' : 'MVK (gene)|MVK',
'MVP' : 'MVP (gene)|MVP',
'MYNN' : 'MYNN (gene)|MYNN',
'NACA' : 'NACA (gene)|NACA',
'NAGA' : 'NAGA (gene)|NAGA',
'NANP' : 'NANP (gene)|NANP',
'NBL1' : 'NBL1 (gene)|NBL1',
'NEBL' : 'NEBL (gene)|NEBL',
'NEMF' : 'NEMF (gene)|NEMF',
'NES' : 'NES (gene)|NES',
'NFIC' : 'NFIC (gene)|NFIC',
'NGEF' : 'NGEF (gene)|NGEF',
'NHS' : 'NHS (gene)|NHS',
'NKRF' : 'NKRF (gene)|NKRF',
'NNAT' : 'NNAT (gene)|NNAT',
'NPB' : 'NPB (gene)|NPB',
'NRK' : 'NRK (gene)|NRK',
'NRL' : 'NRL (gene)|NRL',
'NVL' : 'NVL (gene)|NVL',
'NXN' : 'NXN (gene)|NXN',
'ODAM' : 'ODAM (gene)|ODAM',
'OSCAR' : 'OSCAR (gene)|OSCAR',
'OSR2' : 'OSR2 (gene)|OSR2',
'OSTN' : 'OSTN (gene)|OSTN',
'PATJ' : 'PATJ (gene)|PATJ',
'PCP2' : 'PCP2 (gene)|PCP2',
'PCTP' : 'PCTP (gene)|PCTP',
'PDF' : 'PDF (gene)|PDF',
'PIFO' : 'PIFO (gene)|PIFO',
'PIGN' : 'PIGN (gene)|PIGN',
'PIM2' : 'PIM2 (gene)|PIM2',
'PLAA' : 'PLAA (gene)|PLAA',
'PMCH' : 'PMCH (gene)|PMCH',
'PNOC' : 'PNOC (gene)|PNOC',
'POP1' : 'POP1 (gene)|POP1',
'POP4' : 'POP4 (gene)|POP4',
'PPCS' : 'PPCS (gene)|PPCS',
'PPIE' : 'PPIE (gene)|PPIE',
'PPIG' : 'PPIG (gene)|PPIG',
'PPL' : 'PPL (gene)|PPL',
'PREP' : 'PREP (gene)|PREP',
'PRTG' : 'PRTG (gene)|PRTG',
'PSD2' : 'PSD2 (gene)|PSD2',
'PSG1' : 'PSG1 (gene)|PSG1',
'QPRT' : 'QPRT (gene)|QPRT',
'RAX' : 'RAX (gene)|RAX',
'RBFA' : 'RBFA (gene)|RBFA',
'RDX' : 'RDX (gene)|RDX',
'REST' : 'REST (gene)|REST',
'RFK' : 'RFK (gene)|RFK',
'RGL4' : 'RGL4 (gene)|RGL4',
'RHO' : 'RHO (gene)|RHO',
'RHOF' : 'RHOF (gene)|RHOF',
'RHOV' : 'RHOV (gene)|RHOV',
'RTL4' : 'RTL4 (gene)|RTL4',
'RTL5' : 'RTL5 (gene)|RTL5',
'RTL9' : 'RTL9 (gene)|RTL9',
'RTP1' : 'RTP1 (gene)|RTP1',
'RTP2' : 'RTP2 (gene)|RTP2',
'RTP3' : 'RTP3 (gene)|RTP3',
'RTP4' : 'RTP4 (gene)|RTP4',
'RTP5' : 'RTP5 (gene)|RTP5',
'SBSN' : 'SBSN (gene)|SBSN',
'SCEL' : 'SCEL (gene)|SCEL',
'SCIMP' : 'SCIMP (gene)|SCIMP',
'SCLY' : 'SCLY (gene)|SCLY',
'SDHC' : 'SDHC (gene)|SDHC',
'SELL' : 'SELL (gene)|SELL',
'SHD' : 'SHD (gene)|SHD',
'SHPK' : 'SHPK (gene)|SHPK',
'SI' : 'SI (gene)|SI',
'SIAE' : 'SIAE (gene)|SIAE',
'SMS' : 'SMS (gene)|SMS',
'SNCB' : 'SNCB (gene)|SNCB',
'SP100' : 'SP100 (gene)|SP100',
'SPARC' : 'SPARC (gene)|SPARC',
'SPRN' : 'SPRN (gene)|SPRN',
'SRMS' : 'SRMS (gene)|SRMS',
'SSX3' : 'SSX3 (gene)|SSX3',
'STYX' : 'STYX (gene)|STYX',
'SUCO' : 'SUCO (gene)|SUCO',
'SVOP' : 'SVOP (gene)|SVOP',
'SYK' : 'SYK (gene)|SYK',
'SYP' : 'SYP (gene)|SYP',
'TACR2' : 'TACR2 (gene)|TACR2',
'TEPP' : 'TEPP (gene)|TEPP',
'THEMIS' : 'THEMIS (gene)|THEMIS',
'TIFA' : 'TIFA (gene)|TIFA',
'TSR1' : 'TSR1 (gene)|TSR1',
'TXK' : 'TXK (gene)|TXK',
'UBC' : 'UBC (gene)|UBC',
'UFC1' : 'UFC1 (gene)|UFC1',
'USF3' : 'USF3 (gene)|USF3',
'VASP' : 'VASP (gene)|VASP',
'VHLL' : 'VHLL (gene)|VHLL',
'VIP' : 'VIP (gene)|VIP',
'VMAC' : 'VMAC (gene)|VMAC',
'WAPL' : 'WAPL (gene)|WAPL',
'WDCP' : 'WDCP (gene)|WDCP',
'WTAP' : 'WTAP (gene)|WTAP',
'WTIP' : 'WTIP (gene)|WTIP',
}
# Saving the 'protein-coding_gene.txt' file in the current working directory
def downloadGeneFile():
print("Downloading \'" , readFile,"\' from the HGNC ftp server. This may take a few seconds.\n", sep="")
ftp = ftplib.FTP('ftp.ebi.ac.uk')
ftp.login()
ftp.cwd('/pub/databases/genenames/new/tsv/locus_groups')
with io.open(readFile, 'wb') as data:
ftp.retrbinary('RETR protein-coding_gene.txt', data.write)
# Writing the wikitext files
def forWP():
# Generating date variable for the "Complete list" template and the citation template variable for the wikitables
currentMonthYear = f'{startTime:%B} {startTime.year}'
currentDate = f'{startTime.day} {startTime:%B} {startTime.year}'
url = "https://www.genenames.org/download/statistics-and-files/"
title = "Statistics & download files"
publisher = "HUGO Gene Nomenclature Committee"
tableReference = "{{safesubst:#tag:ref|{{cite web | title = " + title + " | url = " + url + " | website = www.genenames.org | publisher = " + publisher + " | accessdate = " + currentDate + " | date = " + currentDate + "}}}}"
navboxGenes=[]
# Setting initial index value
i = 0
# This code block reads the HGNC protein-coding_gene.txt file and writes the source code of all four list pages to different text files
with open(readFile, 'r', encoding='utf-8') as input:
with open(writeFile1, 'w', encoding='utf-8') as wiki1, open(writeFile2, 'w', encoding='utf-8') as wiki2, open(writeFile3, 'w', encoding='utf-8') as wiki3, open(writeFile4, 'w', encoding='utf-8') as wiki4:
# Setting the script to initially write wikitable data to the first file
wiki = wiki1
for line in csv.reader(input, delimiter="\t"):
# Manually writing in table headers
if i == 0:
print("{{Complete list|date=" + currentMonthYear + "}}",file=wiki)
print("{{:Wikipedia:WikiProject Molecular Biology/Molecular and Cell Biology/Human protein-coding genes}}", file=wiki)
print("{| class=\"wikitable sortable\" style=\"margin: 1em auto;\"", file=wiki)
print("|+ {{nowrap|[[Human protein-coding genes]] listed in the [[HGNC]] database", tableReference, "}}", sep="", file=wiki)
print("! scope=\"col\" | index", file=wiki)
print("! scope=\"col\" | [[Gene symbol]]", sep="", file=wiki)
print("! scope=\"col\" | HGNC ID", sep="", file=wiki)
print("! scope=\"col\" | [[UniProt]] ID(s)", sep="", file=wiki)
print("|-", file=wiki)
i+=1
# NB: the "and line[5]=='Approved'" restriction excludes the "Entry withdrawn" symbols at the end of the 4th list page.
elif i > 0 and line[5]=='Approved':
navboxIndices=[1,5000,5001,10000,10001,15000,15001]
# Appending gene symbols to list of navbox gene symbol indices
if i in navboxIndices:
if line[1] in mistargetedLinkDictionary.keys():
navboxGenes.append("[["+mistargetedLinkDictionary[line[1]]+"]]")
else:
navboxGenes.append("[["+line[1]+"]]")
# Using the parser to get the rest of the table data
# Index
print("|" , i, sep="", file=wiki)
# Gene symbol
if line[1] in mistargetedLinkDictionary.keys():
print("|[[",mistargetedLinkDictionary[line[1]],"]]", sep="", file=wiki)
else:
print("|[[",line[1],"]]", sep="", file=wiki)
# HGNC ID
print("|{{HGNC|", line[0][5:], "}}", sep="", file=wiki)
# UNIPROT IDs for proteins
uniprot = line[25]
uniprotIDs = uniprot.split("|")
print("|", sep="", end="", file=wiki)
z = 1
for id in uniprotIDs:
print("{{uniprot|",id,"}}", sep="", end="", file=wiki)
if z < len(uniprotIDs):
print("", end="<br />", file=wiki)
z+=1
print("\n|-", file=wiki)
i+=1
# Setting the script to write wikitable data to the second, third, and fourth files as of entry 5001, 10001, and 15001
if (i == 5001) or (i == 10001) or (i == 15001):
print("|}", file=wiki)
print("\n==References==\n{{Reflist}}", file=wiki)
if (i == 5001):
wiki = wiki2
if (i == 10001):
wiki = wiki3
if (i == 15001):
wiki = wiki4
print("{{Complete list|date=" + currentMonthYear + "}}",file=wiki)
print("{{:Wikipedia:WikiProject Molecular Biology/Molecular and Cell Biology/Human protein-coding genes}}", file=wiki)
print("{| class=\"wikitable sortable\" style=\"margin: 1em auto;\"", file=wiki)
print("|+ {{nowrap|[[Human protein-coding genes]] listed in the [[HGNC]] database", tableReference, "}}", sep="", file=wiki)
print("! scope=\"col\" | index", file=wiki)
print("! scope=\"col\" | [[Gene symbol]]", sep="", file=wiki)
print("! scope=\"col\" | HGNC ID", sep="", file=wiki)
print("! scope=\"col\" | [[UniProt]] ID(s)", sep="", file=wiki)
print("|-", file=wiki)
# Sets the "lastGeneSymbol" variable to the current loop index's gene symbol
lastGeneSymbol=line[1]
# Append the last gene symbol to the list of navbox indices
if lastGeneSymbol in mistargetedLinkDictionary.keys():
navboxGenes.append("[["+mistargetedLinkDictionary[lastGeneSymbol]+"]]")
else:
navboxGenes.append("[["+lastGeneSymbol+"]]")
# Manually writing out the wikitable footer and references section for the fourth file
print("|}", file=wiki)
print("\n==References==\n{{Reflist}}", file=wiki)
return navboxGenes
def runBot(navboxGenes, text=None, pauseOn=False, sandbox=False):
# Functions for pausing and debugging errors during page writing
def pause():
return input("Press Enter to continue ...")
def errorMessage():
print("Something went wrong when writing the page. =<")
print('Error: {}. {}, line: {}'.format(sys.exc_info()[0], sys.exc_info()[1], sys.exc_info()[2].tb_lineno))
# Defining pages to edit
if sandbox==True:
sandboxPrefix="Wikipedia:WikiProject Molecular Biology/Molecular and Cell Biology/Human protein-coding genes"
pageList=[sandboxPrefix+"1",sandboxPrefix+"2",sandboxPrefix+"3",sandboxPrefix+"4"]
else:
articlePrefix="List of human protein-coding genes "
pageList=[articlePrefix+"1",articlePrefix+"2",articlePrefix+"3",articlePrefix+"4"]
gene1 = pageList[0]
gene2 = pageList[1]
gene3 = pageList[2]
gene4 = pageList[3]
navbox= "Wikipedia:WikiProject Molecular Biology/Molecular and Cell Biology/Human protein-coding genes"
# Logging in
site = pybot.Site('en', 'wikipedia')
site.login()
# Default edit summary
comment = 'Manually running the Python script to perform an unscheduled update'
if type(text) == str:
comment = text
elif type(text) != str and text != None:
print("The input text must be a string")
else:
pass
with open(writeFile1, 'r', encoding='utf-8') as page1, open(writeFile2, 'r', encoding='utf-8') as page2, open(writeFile3, 'r', encoding='utf-8') as page3, open(writeFile4, 'r', encoding='utf-8') as page4:
try:
# Loading the first gene list page and rewriting the page content with the current HGNC data
wikipage = pybot.Page(site, gene1)
print("Updating HGNC data in ", wikipage.title(), sep="")
lines = page1.read(1800000)
wikipage.text = lines
print(wikipage.text)
if pauseOn==True:
pause()
wikipage.save(summary=comment, minor=False)
except:
pybot.logging.error(errorMessage())
try:
# Loading the second gene list page and rewriting the page content with the current HGNC data
wikipage = pybot.Page(site, gene2)
print("Updating HGNC data in ", wikipage.title(), sep="")
lines = page2.read(1800000)
wikipage.text = lines
print(wikipage.text)
if pauseOn==True:
pause()
wikipage.save(summary=comment, minor=False)
except:
pybot.logging.error(errorMessage())
try:
# Loading the third gene list page and rewriting the page content with the current HGNC data
wikipage = pybot.Page(site, gene3)
print("Updating HGNC data in ", wikipage.title(), sep="")
lines = page3.read(1800000)
wikipage.text = lines
print(wikipage.text)
if pauseOn==True:
pause()
wikipage.save(summary=comment, minor=False)
except:
pybot.logging.error(errorMessage())
try:
# Loading the fourth gene list page and rewriting the page content with the current HGNC data
wikipage = pybot.Page(site, gene4)
print("Updating HGNC data in ", wikipage.title(), sep="")
lines = page4.read(1800000)
wikipage.text = lines
print(wikipage.text)
if pauseOn==True:
pause()
wikipage.save(summary=comment, minor=False)
except:
pybot.logging.error(errorMessage())
try:
# Loading the navbox page and rewriting the page content with the current HGNC data
wikipage = pybot.Page(site, navbox)
print("Updating navbox page: ", wikipage.title(), sep="")
regex =[r"(?<=1\]\] covers genes )[\S\d ]*?(?=<br \/>)", r"(?<=2\]\] covers genes )[\S\d ]*?(?=<br \/>)",
r"(?<=3\]\] covers genes )[\S\d ]*?(?=<br \/>)", r"(?<=4\]\] covers genes )[\S\d ]*?(?=<br \/>)"]
substitute=[navboxGenes[0]+"–"+navboxGenes[1], navboxGenes[2]+"–"+navboxGenes[3],
navboxGenes[4]+"–"+navboxGenes[5], navboxGenes[6]+"–"+navboxGenes[7]]
text=wikipage.text
for k in range(0,4):
text=re.sub(regex[k],substitute[k],text)
wikipage.text = text
if pauseOn==True:
pause()
wikipage.save(summary="Updating gene list cutoffs", minor=False)
except:
pybot.logging.error(errorMessage())
# Deleting the downloaded and auto-generated text files
print("Deleting the text files used in this bot script:", readFile, writeFile1, writeFile2, writeFile3, writeFile4, sep="\n")
os.remove(readFile)
os.remove(writeFile1)
os.remove(writeFile2)
os.remove(writeFile3)
os.remove(writeFile4)
print("\nFinished writing to Wikipedia\n")
def main():
downloadGeneFile()
navboxGenes=forWP()
runBot(navboxGenes, text="Performing gene list update" , pauseOn=False, sandbox=False)
print("Time to execute script:", datetime.now() - startTime)
# Calling the functions defined above
if __name__ == "__main__":
main()