Plantae: -- REPLACE serves to eliminate new-line characters causing import errors -- join to catalogue_number serves to eliminate leading unidentified whitespace from the raw data, enforcing use of processed version -- set explicit ESCAPE for control characters; ENCLOSE fields in " SET NAMES 'utf8'; SELECT o.data_provider_id AS 'data_provider_id', o.data_resource_id AS 'data_resource_id', dp.name AS 'Data_provider', dr.name AS 'Dataset', REPLACE(IFNULL(r.collector_name, ""), "\n", ", ") AS 'Collector_name', /* '' as 'Collector_number', */ /* '' as 'Field_number', */ /* '' as 'GUID', */ o.occurrence_date AS 'Date_collected', r.institution_code AS 'Institution_code', r.collection_code AS 'Collection_code', cat.code AS 'Catalogue_number', lub.br_value AS 'Basis_of_record', /* ir.url as 'Image_url', */ /* o.modified AS 'last_indexed', */ /* REPLACE(IFNULL(r.identifier_name, ""), "\n", " ") AS 'Identifier', */ /* r.identification_date AS 'Identification_date', */ r.scientific_name AS 'Scientific_name_original', /* r.author AS 'Author', */ nn.canonical AS 'Scientific_name', /* r.kingdom as 'Kingdom_original', */ kn.canonical AS 'Kingdom', /* r.phylum as 'Phylum_original ', */ pn.canonical AS 'Phylum', /* r.class as 'Class_original ', */ cn.canonical AS 'Class', /* r.order_rank as 'Order_rank_original ', */ orn.canonical AS 'Order_rank', /* r.family as 'Family_original ', * fn.canonical AS 'Family', r.genus as 'Genus_original ', gn.canonical AS 'Genus', /* r.country as 'Country_original', */ o.iso_country_code AS 'Country', /* REPLACE(IFNULL(r.locality, ""), "\n", " ") AS 'Locality', */ /* REPLACE(IFNULL(r.county, ""), "\n", " ") AS 'County', */ /* REPLACE(IFNULL(r.continent_ocean, ""), "\n", " ") AS 'Continent_or_Ocean', */ /* REPLACE(IFNULL(r.state_province, ""), "\n", " ") AS 'State_or_Province', */ /* c.region AS 'Region', */ /* dp.iso_country_code AS 'Provider_country', */ r.latitude AS 'Latitude', o.latitude AS 'Latitude_interpreted', r.longitude AS 'Longitude', o.longitude AS 'Longitude_interpreted', r.lat_long_precision AS 'Coordinate_precision', o.geospatial_issue AS 'geospatial_issue', /* o.cell_id AS 'cell_id', */ /* o.centi_cell_id AS 'centi_cell_id', */ /* r.min_depth AS 'Min_depth', */ /* r.max_depth AS 'Max_depth', */ /* r.min_altitude AS 'Min_altitude', */ /* r.max_altitude AS 'Max_altitude', */ /* r.altitude_precision AS 'Altitude_precision', */ /* CONCAT('http://data.gbif.org/occurrences/',o.id) AS 'gbif_portal_url', */ /* CONCAT('http://data.gbif.org/ws/rest/occurrence/get/',o.id) AS 'gbif_webservice_url', */ o.id AS 'occurrence_id', o.nub_concept_id AS 'nub_concept_id' INTO OUTFILE '/var/local/large/db/mysql/Plantae.txt' FIELDS TERMINATED BY '\t' ESCAPED BY '\\' ENCLOSED BY '"' LINES TERMINATED BY '\n' FROM occurrence_record o JOIN data_resource dr ON dr.id = o.data_resource_id JOIN data_provider dp ON o.data_provider_id = dp.id LEFT JOIN country c ON o.iso_country_code = c.iso_country_code LEFT JOIN lookup_basis_of_record lub ON o.basis_of_record=lub.br_key LEFT JOIN catalogue_number cat ON o.catalogue_number_id=cat.id /* left join image_record ir ON o.id = ir.occurrence_id */ INNER JOIN raw_occurrence_record r ON o.id=r.id INNER JOIN taxon_concept nc ON o.nub_concept_id=nc.id INNER JOIN taxon_name nn ON nc.taxon_name_id=nn.id LEFT JOIN taxon_concept kc ON nc.kingdom_concept_id=kc.id LEFT JOIN taxon_name kn ON kc.taxon_name_id=kn.id LEFT JOIN taxon_concept pc ON nc.phylum_concept_id=pc.id LEFT JOIN taxon_name pn ON pc.taxon_name_id=pn.id LEFT JOIN taxon_concept cc ON nc.class_concept_id=cc.id LEFT JOIN taxon_name cn ON cc.taxon_name_id=cn.id LEFT JOIN taxon_concept oc ON nc.order_concept_id=oc.id LEFT JOIN taxon_name orn ON oc.taxon_name_id=orn.id LEFT JOIN taxon_concept fc ON nc.family_concept_id=fc.id LEFT JOIN taxon_name fn ON fc.taxon_name_id=fn.id LEFT JOIN taxon_concept gc ON nc.genus_concept_id=gc.id LEFT JOIN taxon_name gn ON gc.taxon_name_id=gn.id WHERE o.kingdom_concept_id = 6 /* AND o.cell_id IS NOT NULL – if it should be georeferenced records only */ ;